Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Nov 2013 17:57:35 +0000 (09:57 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Nov 2013 17:57:35 +0000 (09:57 -0800)
Pull networking fixes from David Miller:

 1) Fix memory leaks and other issues in mwifiex driver, from Amitkumar
    Karwar.

 2) skb_segment() can choke on packets using frag lists, fix from
    Herbert Xu with help from Eric Dumazet and others.

 3) IPv4 output cached route instantiation properly handles races
    involving two threads trying to install the same route, but we
    forgot to propagate this logic to input routes as well.  Fix from
    Alexei Starovoitov.

 4) Put protections in place to make sure that recvmsg() paths never
    accidently copy uninitialized memory back into userspace and also
    make sure that we never try to use more that sockaddr_storage for
    building the on-kernel-stack copy of a sockaddr.  Fixes from Hannes
    Frederic Sowa.

 5) R8152 driver transmit flow bug fixes from Hayes Wang.

 6) Fix some minor fallouts from genetlink changes, from Johannes Berg
    and Michael Opdenacker.

 7) AF_PACKET sendmsg path can race with netdevice unregister notifier,
    fix by using RCU to make sure the network device doesn't go away
    from under us.  Fix from Daniel Borkmann.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (43 commits)
  gso: handle new frag_list of frags GRO packets
  genetlink: fix genl_set_err() group ID
  genetlink: fix genlmsg_multicast() bug
  packet: fix use after free race in send path when dev is released
  xen-netback: stop the VIF thread before unbinding IRQs
  wimax: remove dead code
  net/phy: Add the autocross feature for forced links on VSC82x4
  net/phy: Add VSC8662 support
  net/phy: Add VSC8574 support
  net/phy: Add VSC8234 support
  net: add BUG_ON if kernel advertises msg_namelen > sizeof(struct sockaddr_storage)
  net: rework recvmsg handler msg_name and msg_namelen logic
  bridge: flush br's address entry in fdb when remove the
  net: core: Always propagate flag changes to interfaces
  ipv4: fix race in concurrent ip_route_input_slow()
  r8152: fix incorrect type in assignment
  r8152: support stopping/waking tx queue
  r8152: modify the tx flow
  r8152: fix tx/rx memory overflow
  netfilter: ebt_ip6: fix source and destination matching
  ...

469 files changed:
Documentation/assoc_array.txt [new file with mode: 0644]
Documentation/devicetree/bindings/dma/atmel-dma.txt
Documentation/devicetree/bindings/i2c/trivial-devices.txt
Documentation/devicetree/bindings/powerpc/fsl/dma.txt
Documentation/dmatest.txt
Documentation/filesystems/btrfs.txt
Documentation/kernel-parameters.txt
Documentation/power/runtime_pm.txt
Documentation/security/00-INDEX
Documentation/security/IMA-templates.txt [new file with mode: 0644]
Documentation/security/keys.txt
Documentation/vm/split_page_table_lock
MAINTAINERS
arch/alpha/Kconfig
arch/alpha/include/asm/machvec.h
arch/alpha/include/asm/pal.h
arch/alpha/include/asm/rtc.h
arch/alpha/include/asm/string.h
arch/alpha/include/uapi/asm/pal.h
arch/alpha/kernel/Makefile
arch/alpha/kernel/alpha_ksyms.c
arch/alpha/kernel/irq_alpha.c
arch/alpha/kernel/machvec_impl.h
arch/alpha/kernel/perf_event.c
arch/alpha/kernel/process.c
arch/alpha/kernel/proto.h
arch/alpha/kernel/rtc.c [new file with mode: 0644]
arch/alpha/kernel/setup.c
arch/alpha/kernel/smp.c
arch/alpha/kernel/sys_jensen.c
arch/alpha/kernel/sys_marvel.c
arch/alpha/kernel/time.c
arch/alpha/kernel/traps.c
arch/alpha/lib/csum_partial_copy.c
arch/alpha/lib/ev6-memset.S
arch/alpha/lib/memset.S
arch/arm/Kconfig
arch/arm/common/edma.c
arch/arm/include/asm/hardware/iop3xx-adma.h
arch/arm/include/asm/hardware/iop_adma.h
arch/arm/include/asm/memory.h
arch/arm/kernel/head.S
arch/arm/kernel/traps.c
arch/arm/kvm/mmu.c
arch/arm/lib/bitops.h
arch/arm/mach-iop13xx/include/mach/adma.h
arch/arm/mm/mmu.c
arch/arm/mm/nommu.c
arch/arm/mm/proc-v7.S
arch/avr32/boot/u-boot/head.S
arch/avr32/include/asm/kprobes.h
arch/avr32/include/uapi/asm/Kbuild
arch/avr32/include/uapi/asm/auxvec.h
arch/avr32/include/uapi/asm/bitsperlong.h [deleted file]
arch/avr32/include/uapi/asm/byteorder.h
arch/avr32/include/uapi/asm/cachectl.h
arch/avr32/include/uapi/asm/errno.h [deleted file]
arch/avr32/include/uapi/asm/fcntl.h [deleted file]
arch/avr32/include/uapi/asm/ioctl.h [deleted file]
arch/avr32/include/uapi/asm/ioctls.h [deleted file]
arch/avr32/include/uapi/asm/ipcbuf.h [deleted file]
arch/avr32/include/uapi/asm/kvm_para.h [deleted file]
arch/avr32/include/uapi/asm/mman.h [deleted file]
arch/avr32/include/uapi/asm/msgbuf.h
arch/avr32/include/uapi/asm/poll.h [deleted file]
arch/avr32/include/uapi/asm/posix_types.h
arch/avr32/include/uapi/asm/resource.h [deleted file]
arch/avr32/include/uapi/asm/sembuf.h
arch/avr32/include/uapi/asm/setup.h
arch/avr32/include/uapi/asm/shmbuf.h
arch/avr32/include/uapi/asm/sigcontext.h
arch/avr32/include/uapi/asm/siginfo.h [deleted file]
arch/avr32/include/uapi/asm/signal.h
arch/avr32/include/uapi/asm/socket.h
arch/avr32/include/uapi/asm/sockios.h
arch/avr32/include/uapi/asm/stat.h
arch/avr32/include/uapi/asm/statfs.h [deleted file]
arch/avr32/include/uapi/asm/swab.h
arch/avr32/include/uapi/asm/termbits.h
arch/avr32/include/uapi/asm/termios.h
arch/avr32/include/uapi/asm/types.h
arch/avr32/include/uapi/asm/unistd.h
arch/avr32/kernel/entry-avr32b.S
arch/avr32/kernel/head.S
arch/ia64/hp/common/sba_iommu.c
arch/ia64/include/asm/pci.h
arch/ia64/kernel/perfmon.c
arch/ia64/pci/pci.c
arch/ia64/sn/kernel/io_acpi_init.c
arch/parisc/include/asm/socket.h [new file with mode: 0644]
arch/parisc/include/asm/uaccess.h
arch/parisc/include/uapi/asm/socket.h
arch/parisc/lib/memcpy.c
arch/parisc/mm/fault.c
arch/powerpc/Makefile
arch/powerpc/boot/dts/fsl/b4si-post.dtsi
arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi [new file with mode: 0644]
arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi [new file with mode: 0644]
arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
arch/powerpc/configs/pseries_le_defconfig [new file with mode: 0644]
arch/powerpc/include/asm/elf.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/plpar_wrappers.h
arch/powerpc/include/asm/smp.h
arch/powerpc/include/asm/thread_info.h
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_event.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/signal_32.c
arch/powerpc/kernel/signal_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/time.c
arch/powerpc/kernel/vdso64/sigtramp.S
arch/powerpc/kernel/vio.c
arch/powerpc/mm/gup.c
arch/powerpc/mm/slice.c
arch/powerpc/platforms/Kconfig.cputype
arch/powerpc/platforms/powernv/rng.c
arch/powerpc/platforms/pseries/eeh_pseries.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/rng.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/wsp/chroma.c
arch/powerpc/platforms/wsp/h8.c
arch/powerpc/platforms/wsp/ics.c
arch/powerpc/platforms/wsp/opb_pic.c
arch/powerpc/platforms/wsp/psr2.c
arch/powerpc/platforms/wsp/scom_wsp.c
arch/powerpc/platforms/wsp/wsp.c
arch/x86/include/asm/pci.h
arch/x86/include/uapi/asm/msr-index.h
arch/x86/kvm/mmu_audit.c
arch/x86/mm/pgtable.c
arch/x86/pci/acpi.c
block/blk-mq.c
block/partitions/efi.c
crypto/Kconfig
crypto/Makefile
crypto/asymmetric_keys/Kconfig
crypto/asymmetric_keys/asymmetric_type.c
crypto/asymmetric_keys/public_key.c
crypto/asymmetric_keys/public_key.h
crypto/asymmetric_keys/rsa.c
crypto/asymmetric_keys/x509_cert_parser.c
crypto/asymmetric_keys/x509_parser.h
crypto/asymmetric_keys/x509_public_key.c
crypto/async_tx/async_memcpy.c
crypto/async_tx/async_pq.c
crypto/async_tx/async_raid6_recov.c
crypto/async_tx/async_tx.c
crypto/async_tx/async_xor.c
crypto/async_tx/raid6test.c
crypto/hash_info.c [new file with mode: 0644]
drivers/acpi/Kconfig
drivers/acpi/ac.c
drivers/acpi/acpi_lpss.c
drivers/acpi/acpi_platform.c
drivers/acpi/blacklist.c
drivers/acpi/device_pm.c
drivers/acpi/ec.c
drivers/acpi/glue.c
drivers/acpi/pci_root.c
drivers/acpi/scan.c
drivers/acpi/video.c
drivers/ata/libata-acpi.c
drivers/ata/pata_arasan_cf.c
drivers/base/platform.c
drivers/base/power/main.c
drivers/block/null_blk.c
drivers/block/virtio_blk.c
drivers/char/tpm/Kconfig
drivers/char/tpm/Makefile
drivers/char/tpm/tpm-interface.c [new file with mode: 0644]
drivers/char/tpm/tpm.c [deleted file]
drivers/char/tpm/tpm.h
drivers/char/tpm/tpm_atmel.c
drivers/char/tpm/tpm_eventlog.c
drivers/char/tpm/tpm_i2c_atmel.c [new file with mode: 0644]
drivers/char/tpm/tpm_i2c_infineon.c
drivers/char/tpm/tpm_i2c_nuvoton.c [new file with mode: 0644]
drivers/char/tpm/tpm_i2c_stm_st33.c
drivers/char/tpm/tpm_ibmvtpm.c
drivers/char/tpm/tpm_ppi.c
drivers/char/tpm/tpm_tis.c
drivers/char/tpm/xen-tpmfront.c
drivers/cpufreq/cpufreq_conservative.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/omap-cpufreq.c
drivers/dma/Kconfig
drivers/dma/amba-pl08x.c
drivers/dma/at_hdmac.c
drivers/dma/coh901318.c
drivers/dma/cppi41.c
drivers/dma/dma-jz4740.c
drivers/dma/dmaengine.c
drivers/dma/dmatest.c
drivers/dma/dw/core.c
drivers/dma/edma.c
drivers/dma/ep93xx_dma.c
drivers/dma/fsldma.c
drivers/dma/fsldma.h
drivers/dma/imx-dma.c
drivers/dma/imx-sdma.c
drivers/dma/intel_mid_dma.c
drivers/dma/ioat/dma.c
drivers/dma/ioat/dma.h
drivers/dma/ioat/dma_v2.c
drivers/dma/ioat/dma_v2.h
drivers/dma/ioat/dma_v3.c
drivers/dma/ioat/pci.c
drivers/dma/iop-adma.c
drivers/dma/ipu/ipu_idmac.c
drivers/dma/k3dma.c
drivers/dma/mmp_pdma.c
drivers/dma/mmp_tdma.c
drivers/dma/mv_xor.c
drivers/dma/mv_xor.h
drivers/dma/mxs-dma.c
drivers/dma/omap-dma.c
drivers/dma/pl330.c
drivers/dma/ppc4xx/adma.c
drivers/dma/sa11x0-dma.c
drivers/dma/sh/shdma-base.c
drivers/dma/sh/shdmac.c
drivers/dma/ste_dma40.c
drivers/dma/tegra20-apb-dma.c
drivers/dma/timb_dma.c
drivers/dma/txx9dmac.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/i915/intel_acpi.c
drivers/gpu/drm/i915/intel_opregion.c
drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
drivers/gpu/drm/nouveau/nouveau_acpi.c
drivers/gpu/drm/radeon/radeon_acpi.c
drivers/gpu/drm/radeon/radeon_atpx_handler.c
drivers/gpu/drm/radeon/radeon_bios.c
drivers/hid/i2c-hid/i2c-hid.c
drivers/i2c/i2c-core.c
drivers/ide/ide-acpi.c
drivers/idle/intel_idle.c
drivers/md/md.c
drivers/md/raid1.c
drivers/md/raid1.h
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/media/platform/m2m-deinterlace.c
drivers/media/platform/timblogiw.c
drivers/misc/carma/carma-fpga.c
drivers/mmc/core/sdio_bus.c
drivers/mtd/nand/atmel_nand.c
drivers/mtd/nand/fsmc_nand.c
drivers/net/ethernet/micrel/ks8842.c
drivers/ntb/ntb_transport.c
drivers/pci/hotplug/acpi_pcihp.c
drivers/pci/hotplug/acpiphp.h
drivers/pci/hotplug/pciehp_acpi.c
drivers/pci/hotplug/sgi_hotplug.c
drivers/pci/ioapic.c
drivers/pci/pci-acpi.c
drivers/pci/pci-label.c
drivers/platform/x86/apple-gmux.c
drivers/pnp/pnpacpi/core.c
drivers/rtc/Kconfig
drivers/rtc/rtc-at91rm9200.c
drivers/spi/spi-dw-mid.c
drivers/spi/spi.c
drivers/tty/serial/sh-sci.c
drivers/usb/core/hub.c
drivers/usb/core/usb-acpi.c
drivers/xen/pci.c
fs/9p/vfs_dentry.c
fs/aio.c
fs/bio.c
fs/btrfs/Kconfig
fs/btrfs/async-thread.c
fs/btrfs/check-integrity.c
fs/btrfs/ctree.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/ordered-data.c
fs/btrfs/scrub.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/configfs/dir.c
fs/coredump.c
fs/dcache.c
fs/efivarfs/super.c
fs/exec.c
fs/gfs2/glock.c
fs/gfs2/inode.c
fs/gfs2/lock_dlm.c
fs/gfs2/quota.c
fs/gfs2/rgrp.c
fs/hostfs/hostfs_kern.c
fs/libfs.c
fs/namei.c
fs/nfsd/nfs4xdr.c
fs/nfsd/vfs.c
fs/proc/base.c
fs/proc/generic.c
fs/proc/namespaces.c
fs/squashfs/Kconfig
fs/squashfs/Makefile
fs/squashfs/block.c
fs/squashfs/cache.c
fs/squashfs/decompressor.c
fs/squashfs/decompressor.h
fs/squashfs/decompressor_multi.c [new file with mode: 0644]
fs/squashfs/decompressor_multi_percpu.c [new file with mode: 0644]
fs/squashfs/decompressor_single.c [new file with mode: 0644]
fs/squashfs/file.c
fs/squashfs/file_cache.c [new file with mode: 0644]
fs/squashfs/file_direct.c [new file with mode: 0644]
fs/squashfs/lzo_wrapper.c
fs/squashfs/page_actor.c [new file with mode: 0644]
fs/squashfs/page_actor.h [new file with mode: 0644]
fs/squashfs/squashfs.h
fs/squashfs/squashfs_fs_sb.h
fs/squashfs/super.c
fs/squashfs/xz_wrapper.c
fs/squashfs/zlib_wrapper.c
fs/xfs/xfs_bmap.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_trans_inode.c
fs/xfs/xfs_trans_resv.c
include/acpi/acpi_bus.h
include/crypto/hash_info.h [new file with mode: 0644]
include/crypto/public_key.h
include/keys/big_key-type.h [new file with mode: 0644]
include/keys/keyring-type.h
include/keys/system_keyring.h [new file with mode: 0644]
include/linux/acpi.h
include/linux/assoc_array.h [new file with mode: 0644]
include/linux/assoc_array_priv.h [new file with mode: 0644]
include/linux/audit.h
include/linux/blkdev.h
include/linux/device.h
include/linux/dmaengine.h
include/linux/fs.h
include/linux/hugetlb.h
include/linux/key-type.h
include/linux/key.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/pci-acpi.h
include/linux/platform_data/edma.h
include/linux/security.h
include/linux/seqlock.h
include/linux/slab.h
include/linux/slab_def.h
include/linux/slub_def.h
include/linux/user_namespace.h
include/linux/wait.h
include/trace/events/btrfs.h
include/uapi/linux/audit.h
include/uapi/linux/hash_info.h [new file with mode: 0644]
include/uapi/linux/keyctl.h
include/uapi/linux/raid/md_p.h
init/Kconfig
init/main.c
ipc/shm.c
kernel/Makefile
kernel/audit.c
kernel/audit.h
kernel/auditfilter.c
kernel/auditsc.c
kernel/cgroup.c
kernel/modsign_certificate.S [deleted file]
kernel/modsign_pubkey.c [deleted file]
kernel/module-internal.h
kernel/module_signing.c
kernel/power/snapshot.c
kernel/power/user.c
kernel/system_certificates.S [new file with mode: 0644]
kernel/system_keyring.c [new file with mode: 0644]
kernel/user.c
kernel/user_namespace.c
lib/Kconfig
lib/Makefile
lib/assoc_array.c [new file with mode: 0644]
lib/mpi/mpiutil.c
mm/hugetlb.c
mm/memory.c
mm/mempolicy.c
mm/migrate.c
mm/slab.c
mm/slub.c
mm/swap.c
net/Kconfig
net/ipv4/tcp.c
net/sunrpc/rpc_pipe.c
scripts/asn1_compiler.c
scripts/checkpatch.pl
security/Makefile
security/apparmor/audit.c
security/apparmor/capability.c
security/apparmor/domain.c
security/apparmor/include/audit.h
security/apparmor/include/capability.h
security/apparmor/include/ipc.h
security/apparmor/ipc.c
security/apparmor/lsm.c
security/capability.c
security/integrity/digsig.c
security/integrity/digsig_asymmetric.c
security/integrity/evm/evm_main.c
security/integrity/evm/evm_posix_acl.c
security/integrity/iint.c
security/integrity/ima/Kconfig
security/integrity/ima/Makefile
security/integrity/ima/ima.h
security/integrity/ima/ima_api.c
security/integrity/ima/ima_appraise.c
security/integrity/ima/ima_crypto.c
security/integrity/ima/ima_fs.c
security/integrity/ima/ima_init.c
security/integrity/ima/ima_main.c
security/integrity/ima/ima_policy.c
security/integrity/ima/ima_queue.c
security/integrity/ima/ima_template.c [new file with mode: 0644]
security/integrity/ima/ima_template_lib.c [new file with mode: 0644]
security/integrity/ima/ima_template_lib.h [new file with mode: 0644]
security/integrity/integrity.h
security/keys/Kconfig
security/keys/Makefile
security/keys/big_key.c [new file with mode: 0644]
security/keys/compat.c
security/keys/gc.c
security/keys/internal.h
security/keys/key.c
security/keys/keyctl.c
security/keys/keyring.c
security/keys/persistent.c [new file with mode: 0644]
security/keys/proc.c
security/keys/process_keys.c
security/keys/request_key.c
security/keys/request_key_auth.c
security/keys/sysctl.c
security/keys/user_defined.c
security/lsm_audit.c
security/security.c
security/selinux/hooks.c
security/selinux/include/objsec.h
security/selinux/include/security.h
security/selinux/include/xfrm.h
security/selinux/netlabel.c
security/selinux/netnode.c
security/selinux/nlmsgtab.c
security/selinux/selinuxfs.c
security/selinux/ss/ebitmap.c
security/selinux/ss/ebitmap.h
security/selinux/ss/mls.c
security/selinux/ss/mls_types.h
security/selinux/ss/policydb.c
security/selinux/ss/services.c
security/selinux/xfrm.c
security/smack/smack.h
security/smack/smack_access.c
security/smack/smack_lsm.c
security/smack/smackfs.c
sound/soc/davinci/davinci-pcm.c
tools/power/x86/turbostat/turbostat.c
virt/kvm/kvm_main.c

diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
new file mode 100644 (file)
index 0000000..f4faec0
--- /dev/null
@@ -0,0 +1,574 @@
+                  ========================================
+                  GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
+                  ========================================
+
+Contents:
+
+ - Overview.
+
+ - The public API.
+   - Edit script.
+   - Operations table.
+   - Manipulation functions.
+   - Access functions.
+   - Index key form.
+
+ - Internal workings.
+   - Basic internal tree layout.
+   - Shortcuts.
+   - Splitting and collapsing nodes.
+   - Non-recursive iteration.
+   - Simultaneous alteration and iteration.
+
+
+========
+OVERVIEW
+========
+
+This associative array implementation is an object container with the following
+properties:
+
+ (1) Objects are opaque pointers.  The implementation does not care where they
+     point (if anywhere) or what they point to (if anything).
+
+     [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
+
+ (2) Objects do not need to contain linkage blocks for use by the array.  This
+     permits an object to be located in multiple arrays simultaneously.
+     Rather, the array is made up of metadata blocks that point to objects.
+
+ (3) Objects require index keys to locate them within the array.
+
+ (4) Index keys must be unique.  Inserting an object with the same key as one
+     already in the array will replace the old object.
+
+ (5) Index keys can be of any length and can be of different lengths.
+
+ (6) Index keys should encode the length early on, before any variation due to
+     length is seen.
+
+ (7) Index keys can include a hash to scatter objects throughout the array.
+
+ (8) The array can iterated over.  The objects will not necessarily come out in
+     key order.
+
+ (9) The array can be iterated over whilst it is being modified, provided the
+     RCU readlock is being held by the iterator.  Note, however, under these
+     circumstances, some objects may be seen more than once.  If this is a
+     problem, the iterator should lock against modification.  Objects will not
+     be missed, however, unless deleted.
+
+(10) Objects in the array can be looked up by means of their index key.
+
+(11) Objects can be looked up whilst the array is being modified, provided the
+     RCU readlock is being held by the thread doing the look up.
+
+The implementation uses a tree of 16-pointer nodes internally that are indexed
+on each level by nibbles from the index key in the same manner as in a radix
+tree.  To improve memory efficiency, shortcuts can be emplaced to skip over
+what would otherwise be a series of single-occupancy nodes.  Further, nodes
+pack leaf object pointers into spare space in the node rather than making an
+extra branch until as such time an object needs to be added to a full node.
+
+
+==============
+THE PUBLIC API
+==============
+
+The public API can be found in <linux/assoc_array.h>.  The associative array is
+rooted on the following structure:
+
+       struct assoc_array {
+               ...
+       };
+
+The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
+
+
+EDIT SCRIPT
+-----------
+
+The insertion and deletion functions produce an 'edit script' that can later be
+applied to effect the changes without risking ENOMEM.  This retains the
+preallocated metadata blocks that will be installed in the internal tree and
+keeps track of the metadata blocks that will be removed from the tree when the
+script is applied.
+
+This is also used to keep track of dead blocks and dead objects after the
+script has been applied so that they can be freed later.  The freeing is done
+after an RCU grace period has passed - thus allowing access functions to
+proceed under the RCU read lock.
+
+The script appears as outside of the API as a pointer of the type:
+
+       struct assoc_array_edit;
+
+There are two functions for dealing with the script:
+
+ (1) Apply an edit script.
+
+       void assoc_array_apply_edit(struct assoc_array_edit *edit);
+
+     This will perform the edit functions, interpolating various write barriers
+     to permit accesses under the RCU read lock to continue.  The edit script
+     will then be passed to call_rcu() to free it and any dead stuff it points
+     to.
+
+ (2) Cancel an edit script.
+
+       void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+
+     This frees the edit script and all preallocated memory immediately.  If
+     this was for insertion, the new object is _not_ released by this function,
+     but must rather be released by the caller.
+
+These functions are guaranteed not to fail.
+
+
+OPERATIONS TABLE
+----------------
+
+Various functions take a table of operations:
+
+       struct assoc_array_ops {
+               ...
+       };
+
+This points to a number of methods, all of which need to be provided:
+
+ (1) Get a chunk of index key from caller data:
+
+       unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+     This should return a chunk of caller-supplied index key starting at the
+     *bit* position given by the level argument.  The level argument will be a
+     multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
+     ASSOC_ARRAY_KEY_CHUNK_SIZE bits.  No error is possible.
+
+
+ (2) Get a chunk of an object's index key.
+
+       unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+     As the previous function, but gets its data from an object in the array
+     rather than from a caller-supplied index key.
+
+
+ (3) See if this is the object we're looking for.
+
+       bool (*compare_object)(const void *object, const void *index_key);
+
+     Compare the object against an index key and return true if it matches and
+     false if it doesn't.
+
+
+ (4) Diff the index keys of two objects.
+
+       int (*diff_objects)(const void *a, const void *b);
+
+     Return the bit position at which the index keys of two objects differ or
+     -1 if they are the same.
+
+
+ (5) Free an object.
+
+       void (*free_object)(void *object);
+
+     Free the specified object.  Note that this may be called an RCU grace
+     period after assoc_array_apply_edit() was called, so synchronize_rcu() may
+     be necessary on module unloading.
+
+
+MANIPULATION FUNCTIONS
+----------------------
+
+There are a number of functions for manipulating an associative array:
+
+ (1) Initialise an associative array.
+
+       void assoc_array_init(struct assoc_array *array);
+
+     This initialises the base structure for an associative array.  It can't
+     fail.
+
+
+ (2) Insert/replace an object in an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_insert(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          const void *index_key,
+                          void *object);
+
+     This inserts the given object into the array.  Note that the least
+     significant bit of the pointer must be zero as it's used to type-mark
+     pointers internally.
+
+     If an object already exists for that key then it will be replaced with the
+     new object and the old one will be freed automatically.
+
+     The index_key argument should hold index key information and is
+     passed to the methods in the ops table when they are called.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (3) Delete an object from an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_delete(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          const void *index_key);
+
+     This deletes an object that matches the specified data from the array.
+
+     The index_key argument should hold index key information and is
+     passed to the methods in the ops table when they are called.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.  NULL will be returned if the specified object is
+     not found within the array.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (4) Delete all objects from an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_clear(struct assoc_array *array,
+                         const struct assoc_array_ops *ops);
+
+     This deletes all the objects from an associative array and leaves it
+     completely empty.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (5) Destroy an associative array, deleting all objects.
+
+       void assoc_array_destroy(struct assoc_array *array,
+                                const struct assoc_array_ops *ops);
+
+     This destroys the contents of the associative array and leaves it
+     completely empty.  It is not permitted for another thread to be traversing
+     the array under the RCU read lock at the same time as this function is
+     destroying it as no RCU deferral is performed on memory release -
+     something that would require memory to be allocated.
+
+     The caller should lock exclusively against other modifiers and accessors
+     of the array.
+
+
+ (6) Garbage collect an associative array.
+
+       int assoc_array_gc(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          bool (*iterator)(void *object, void *iterator_data),
+                          void *iterator_data);
+
+     This iterates over the objects in an associative array and passes each one
+     to iterator().  If iterator() returns true, the object is kept.  If it
+     returns false, the object will be freed.  If the iterator() function
+     returns true, it must perform any appropriate refcount incrementing on the
+     object before returning.
+
+     The internal tree will be packed down if possible as part of the iteration
+     to reduce the number of nodes in it.
+
+     The iterator_data is passed directly to iterator() and is otherwise
+     ignored by the function.
+
+     The function will return 0 if successful and -ENOMEM if there wasn't
+     enough memory.
+
+     It is possible for other threads to iterate over or search the array under
+     the RCU read lock whilst this function is in progress.  The caller should
+     lock exclusively against other modifiers of the array.
+
+
+ACCESS FUNCTIONS
+----------------
+
+There are two functions for accessing an associative array:
+
+ (1) Iterate over all the objects in an associative array.
+
+       int assoc_array_iterate(const struct assoc_array *array,
+                               int (*iterator)(const void *object,
+                                               void *iterator_data),
+                               void *iterator_data);
+
+     This passes each object in the array to the iterator callback function.
+     iterator_data is private data for that function.
+
+     This may be used on an array at the same time as the array is being
+     modified, provided the RCU read lock is held.  Under such circumstances,
+     it is possible for the iteration function to see some objects twice.  If
+     this is a problem, then modification should be locked against.  The
+     iteration algorithm should not, however, miss any objects.
+
+     The function will return 0 if no objects were in the array or else it will
+     return the result of the last iterator function called.  Iteration stops
+     immediately if any call to the iteration function results in a non-zero
+     return.
+
+
+ (2) Find an object in an associative array.
+
+       void *assoc_array_find(const struct assoc_array *array,
+                              const struct assoc_array_ops *ops,
+                              const void *index_key);
+
+     This walks through the array's internal tree directly to the object
+     specified by the index key..
+
+     This may be used on an array at the same time as the array is being
+     modified, provided the RCU read lock is held.
+
+     The function will return the object if found (and set *_type to the object
+     type) or will return NULL if the object was not found.
+
+
+INDEX KEY FORM
+--------------
+
+The index key can be of any form, but since the algorithms aren't told how long
+the key is, it is strongly recommended that the index key includes its length
+very early on before any variation due to the length would have an effect on
+comparisons.
+
+This will cause leaves with different length keys to scatter away from each
+other - and those with the same length keys to cluster together.
+
+It is also recommended that the index key begin with a hash of the rest of the
+key to maximise scattering throughout keyspace.
+
+The better the scattering, the wider and lower the internal tree will be.
+
+Poor scattering isn't too much of a problem as there are shortcuts and nodes
+can contain mixtures of leaves and metadata pointers.
+
+The index key is read in chunks of machine word.  Each chunk is subdivided into
+one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
+on a 64-bit CPU, 16 levels.  Unless the scattering is really poor, it is
+unlikely that more than one word of any particular index key will have to be
+used.
+
+
+=================
+INTERNAL WORKINGS
+=================
+
+The associative array data structure has an internal tree.  This tree is
+constructed of two types of metadata blocks: nodes and shortcuts.
+
+A node is an array of slots.  Each slot can contain one of four things:
+
+ (*) A NULL pointer, indicating that the slot is empty.
+
+ (*) A pointer to an object (a leaf).
+
+ (*) A pointer to a node at the next level.
+
+ (*) A pointer to a shortcut.
+
+
+BASIC INTERNAL TREE LAYOUT
+--------------------------
+
+Ignoring shortcuts for the moment, the nodes form a multilevel tree.  The index
+key space is strictly subdivided by the nodes in the tree and nodes occur on
+fixed levels.  For example:
+
+ Level:        0               1               2               3
+       =============== =============== =============== ===============
+                                                       NODE D
+                       NODE B          NODE C  +------>+---+
+               +------>+---+   +------>+---+   |       | 0 |
+       NODE A  |       | 0 |   |       | 0 |   |       +---+
+       +---+   |       +---+   |       +---+   |       :   :
+       | 0 |   |       :   :   |       :   :   |       +---+
+       +---+   |       +---+   |       +---+   |       | f |
+       | 1 |---+       | 3 |---+       | 7 |---+       +---+
+       +---+           +---+           +---+
+       :   :           :   :           | 8 |---+
+       +---+           +---+           +---+   |       NODE E
+       | e |---+       | f |           :   :   +------>+---+
+       +---+   |       +---+           +---+           | 0 |
+       | f |   |                       | f |           +---+
+       +---+   |                       +---+           :   :
+               |       NODE F                          +---+
+               +------>+---+                           | f |
+                       | 0 |           NODE G          +---+
+                       +---+   +------>+---+
+                       :   :   |       | 0 |
+                       +---+   |       +---+
+                       | 6 |---+       :   :
+                       +---+           +---+
+                       :   :           | f |
+                       +---+           +---+
+                       | f |
+                       +---+
+
+In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
+Assuming no other meta data nodes in the tree, the key space is divided thusly:
+
+       KEY PREFIX      NODE
+       ==========      ====
+       137*            D
+       138*            E
+       13[0-69-f]*     C
+       1[0-24-f]*      B
+       e6*             G
+       e[0-57-f]*      F
+       [02-df]*        A
+
+So, for instance, keys with the following example index keys will be found in
+the appropriate nodes:
+
+       INDEX KEY       PREFIX  NODE
+       =============== ======= ====
+       13694892892489  13      C
+       13795289025897  137     D
+       13889dde88793   138     E
+       138bbb89003093  138     E
+       1394879524789   12      C
+       1458952489      1       B
+       9431809de993ba  -       A
+       b4542910809cd   -       A
+       e5284310def98   e       F
+       e68428974237    e6      G
+       e7fffcbd443     e       F
+       f3842239082     -       A
+
+To save memory, if a node can hold all the leaves in its portion of keyspace,
+then the node will have all those leaves in it and will not have any metadata
+pointers - even if some of those leaves would like to be in the same slot.
+
+A node can contain a heterogeneous mix of leaves and metadata pointers.
+Metadata pointers must be in the slots that match their subdivisions of key
+space.  The leaves can be in any slot not occupied by a metadata pointer.  It
+is guaranteed that none of the leaves in a node will match a slot occupied by a
+metadata pointer.  If the metadata pointer is there, any leaf whose key matches
+the metadata key prefix must be in the subtree that the metadata pointer points
+to.
+
+In the above example list of index keys, node A will contain:
+
+       SLOT    CONTENT         INDEX KEY (PREFIX)
+       ====    =============== ==================
+       1       PTR TO NODE B   1*
+       any     LEAF            9431809de993ba
+       any     LEAF            b4542910809cd
+       e       PTR TO NODE F   e*
+       any     LEAF            f3842239082
+
+and node B:
+
+       3       PTR TO NODE C   13*
+       any     LEAF            1458952489
+
+
+SHORTCUTS
+---------
+
+Shortcuts are metadata records that jump over a piece of keyspace.  A shortcut
+is a replacement for a series of single-occupancy nodes ascending through the
+levels.  Shortcuts exist to save memory and to speed up traversal.
+
+It is possible for the root of the tree to be a shortcut - say, for example,
+the tree contains at least 17 nodes all with key prefix '1111'.  The insertion
+algorithm will insert a shortcut to skip over the '1111' keyspace in a single
+bound and get to the fourth level where these actually become different.
+
+
+SPLITTING AND COLLAPSING NODES
+------------------------------
+
+Each node has a maximum capacity of 16 leaves and metadata pointers.  If the
+insertion algorithm finds that it is trying to insert a 17th object into a
+node, that node will be split such that at least two leaves that have a common
+key segment at that level end up in a separate node rooted on that slot for
+that common key segment.
+
+If the leaves in a full node and the leaf that is being inserted are
+sufficiently similar, then a shortcut will be inserted into the tree.
+
+When the number of objects in the subtree rooted at a node falls to 16 or
+fewer, then the subtree will be collapsed down to a single node - and this will
+ripple towards the root if possible.
+
+
+NON-RECURSIVE ITERATION
+-----------------------
+
+Each node and shortcut contains a back pointer to its parent and the number of
+slot in that parent that points to it.  None-recursive iteration uses these to
+proceed rootwards through the tree, going to the parent node, slot N + 1 to
+make sure progress is made without the need for a stack.
+
+The backpointers, however, make simultaneous alteration and iteration tricky.
+
+
+SIMULTANEOUS ALTERATION AND ITERATION
+-------------------------------------
+
+There are a number of cases to consider:
+
+ (1) Simple insert/replace.  This involves simply replacing a NULL or old
+     matching leaf pointer with the pointer to the new leaf after a barrier.
+     The metadata blocks don't change otherwise.  An old leaf won't be freed
+     until after the RCU grace period.
+
+ (2) Simple delete.  This involves just clearing an old matching leaf.  The
+     metadata blocks don't change otherwise.  The old leaf won't be freed until
+     after the RCU grace period.
+
+ (3) Insertion replacing part of a subtree that we haven't yet entered.  This
+     may involve replacement of part of that subtree - but that won't affect
+     the iteration as we won't have reached the pointer to it yet and the
+     ancestry blocks are not replaced (the layout of those does not change).
+
+ (4) Insertion replacing nodes that we're actively processing.  This isn't a
+     problem as we've passed the anchoring pointer and won't switch onto the
+     new layout until we follow the back pointers - at which point we've
+     already examined the leaves in the replaced node (we iterate over all the
+     leaves in a node before following any of its metadata pointers).
+
+     We might, however, re-see some leaves that have been split out into a new
+     branch that's in a slot further along than we were at.
+
+ (5) Insertion replacing nodes that we're processing a dependent branch of.
+     This won't affect us until we follow the back pointers.  Similar to (4).
+
+ (6) Deletion collapsing a branch under us.  This doesn't affect us because the
+     back pointers will get us back to the parent of the new node before we
+     could see the new node.  The entire collapsed subtree is thrown away
+     unchanged - and will still be rooted on the same slot, so we shouldn't
+     process it a second time as we'll go back to slot + 1.
+
+Note:
+
+ (*) Under some circumstances, we need to simultaneously change the parent
+     pointer and the parent slot pointer on a node (say, for example, we
+     inserted another node before it and moved it up a level).  We cannot do
+     this without locking against a read - so we have to replace that node too.
+
+     However, when we're changing a shortcut into a node this isn't a problem
+     as shortcuts only have one slot and so the parent slot number isn't used
+     when traversing backwards over one.  This means that it's okay to change
+     the slot number first - provided suitable barriers are used to make sure
+     the parent slot number is read after the back pointer.
+
+Obsolete blocks and leaves are freed up after an RCU grace period has passed,
+so as long as anyone doing walking or iteration holds the RCU read lock, the
+old superstructure should not go away on them.
index e1f343c7a34b7b10ea462a39b5e9a320ef463ba6..f69bcf5a6343bf314b5eef199999c5a676131e74 100644 (file)
@@ -28,7 +28,7 @@ The three cells in order are:
 dependent:
   - bit 7-0: peripheral identifier for the hardware handshaking interface. The
   identifier can be different for tx and rx.
-  - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 1 for ASAP.
+  - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 2 for ASAP.
 
 Example:
 
index ad6a73852f0880bf3625893d6ffc11646bdc8048..f1fb26eed0e9486f7226f58c6b14fce571c2f371 100644 (file)
@@ -15,6 +15,7 @@ adi,adt7461           +/-1C TDM Extended Temp Range I.C
 adt7461                        +/-1C TDM Extended Temp Range I.C
 at,24c08               i2c serial eeprom  (24cxx)
 atmel,24c02            i2c serial eeprom  (24cxx)
+atmel,at97sc3204t      i2c trusted platform module (TPM)
 catalyst,24c32         i2c serial eeprom
 dallas,ds1307          64 x 8, Serial, I2C Real-Time Clock
 dallas,ds1338          I2C RTC with 56-Byte NV RAM
@@ -44,6 +45,7 @@ mc,rv3029c2           Real Time Clock Module with I2C-Bus
 national,lm75          I2C TEMP SENSOR
 national,lm80          Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor
 national,lm92          Â±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface
+nuvoton,npct501                i2c trusted platform module (TPM)
 nxp,pca9556            Octal SMBus and I2C registered interface
 nxp,pca9557            8-bit I2C-bus and SMBus I/O port with reset
 nxp,pcf8563            Real-time clock/calendar
@@ -61,3 +63,4 @@ taos,tsl2550          Ambient Light Sensor with SMBUS/Two Wire Serial Interface
 ti,tsc2003             I2C Touch-Screen Controller
 ti,tmp102              Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface
 ti,tmp275              Digital Temperature Sensor
+winbond,wpct301                i2c trusted platform module (TPM)
index 2a4b4bce6110af59579c6f29e36fb8bcd71f7428..7fc1b010fa759a9cb69ab286b4ca032d295f0046 100644 (file)
@@ -1,33 +1,30 @@
-* Freescale 83xx DMA Controller
+* Freescale DMA Controllers
 
-Freescale PowerPC 83xx have on chip general purpose DMA controllers.
+** Freescale Elo DMA Controller
+   This is a little-endian 4-channel DMA controller, used in Freescale mpc83xx
+   series chips such as mpc8315, mpc8349, mpc8379 etc.
 
 Required properties:
 
-- compatible        : compatible list, contains 2 entries, first is
-                "fsl,CHIP-dma", where CHIP is the processor
-                (mpc8349, mpc8360, etc.) and the second is
-                "fsl,elo-dma"
-- reg               : <registers mapping for DMA general status reg>
-- ranges               : Should be defined as specified in 1) to describe the
-                 DMA controller channels.
+- compatible        : must include "fsl,elo-dma"
+- reg               : DMA General Status Register, i.e. DGSR which contains
+                      status for all the 4 DMA channels
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
 - cell-index        : controller index.  0 for controller @ 0x8100
-- interrupts        : <interrupt mapping for DMA IRQ>
+- interrupts        : interrupt specifier for DMA IRQ
 - interrupt-parent  : optional, if needed for interrupt mapping
 
-
 - DMA channel nodes:
-        - compatible        : compatible list, contains 2 entries, first is
-                        "fsl,CHIP-dma-channel", where CHIP is the processor
-                        (mpc8349, mpc8350, etc.) and the second is
-                        "fsl,elo-dma-channel". However, see note below.
-        - reg               : <registers mapping for channel>
-        - cell-index        : dma channel index starts at 0.
+        - compatible        : must include "fsl,elo-dma-channel"
+                              However, see note below.
+        - reg               : DMA channel specific registers
+        - cell-index        : DMA channel index starts at 0.
 
 Optional properties:
-        - interrupts        : <interrupt mapping for DMA channel IRQ>
-                         (on 83xx this is expected to be identical to
-                          the interrupts property of the parent node)
+        - interrupts        : interrupt specifier for DMA channel IRQ
+                              (on 83xx this is expected to be identical to
+                              the interrupts property of the parent node)
         - interrupt-parent  : optional, if needed for interrupt mapping
 
 Example:
@@ -70,30 +67,27 @@ Example:
                };
        };
 
-* Freescale 85xx/86xx DMA Controller
-
-Freescale PowerPC 85xx/86xx have on chip general purpose DMA controllers.
+** Freescale EloPlus DMA Controller
+   This is a 4-channel DMA controller with extended addresses and chaining,
+   mainly used in Freescale mpc85xx/86xx, Pxxx and BSC series chips, such as
+   mpc8540, mpc8641 p4080, bsc9131 etc.
 
 Required properties:
 
-- compatible        : compatible list, contains 2 entries, first is
-                "fsl,CHIP-dma", where CHIP is the processor
-                (mpc8540, mpc8540, etc.) and the second is
-                "fsl,eloplus-dma"
-- reg               : <registers mapping for DMA general status reg>
+- compatible        : must include "fsl,eloplus-dma"
+- reg               : DMA General Status Register, i.e. DGSR which contains
+                      status for all the 4 DMA channels
 - cell-index        : controller index.  0 for controller @ 0x21000,
                                          1 for controller @ 0xc000
-- ranges               : Should be defined as specified in 1) to describe the
-                 DMA controller channels.
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
 
 - DMA channel nodes:
-        - compatible        : compatible list, contains 2 entries, first is
-                        "fsl,CHIP-dma-channel", where CHIP is the processor
-                        (mpc8540, mpc8560, etc.) and the second is
-                        "fsl,eloplus-dma-channel". However, see note below.
-        - cell-index        : dma channel index starts at 0.
-        - reg               : <registers mapping for channel>
-        - interrupts        : <interrupt mapping for DMA channel IRQ>
+        - compatible        : must include "fsl,eloplus-dma-channel"
+                              However, see note below.
+        - cell-index        : DMA channel index starts at 0.
+        - reg               : DMA channel specific registers
+        - interrupts        : interrupt specifier for DMA channel IRQ
         - interrupt-parent  : optional, if needed for interrupt mapping
 
 Example:
@@ -134,6 +128,76 @@ Example:
                };
        };
 
+** Freescale Elo3 DMA Controller
+   DMA controller which has same function as EloPlus except that Elo3 has 8
+   channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx
+   series chips, such as t1040, t4240, b4860.
+
+Required properties:
+
+- compatible        : must include "fsl,elo3-dma"
+- reg               : contains two entries for DMA General Status Registers,
+                      i.e. DGSR0 which includes status for channel 1~4, and
+                      DGSR1 for channel 5~8
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
+
+- DMA channel nodes:
+        - compatible        : must include "fsl,eloplus-dma-channel"
+        - reg               : DMA channel specific registers
+        - interrupts        : interrupt specifier for DMA channel IRQ
+        - interrupt-parent  : optional, if needed for interrupt mapping
+
+Example:
+dma@100300 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,elo3-dma";
+       reg = <0x100300 0x4>,
+             <0x100600 0x4>;
+       ranges = <0x0 0x100100 0x500>;
+       dma-channel@0 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x0 0x80>;
+               interrupts = <28 2 0 0>;
+       };
+       dma-channel@80 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x80 0x80>;
+               interrupts = <29 2 0 0>;
+       };
+       dma-channel@100 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x100 0x80>;
+               interrupts = <30 2 0 0>;
+       };
+       dma-channel@180 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x180 0x80>;
+               interrupts = <31 2 0 0>;
+       };
+       dma-channel@300 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x300 0x80>;
+               interrupts = <76 2 0 0>;
+       };
+       dma-channel@380 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x380 0x80>;
+               interrupts = <77 2 0 0>;
+       };
+       dma-channel@400 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x400 0x80>;
+               interrupts = <78 2 0 0>;
+       };
+       dma-channel@480 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x480 0x80>;
+               interrupts = <79 2 0 0>;
+       };
+};
+
 Note on DMA channel compatible properties: The compatible property must say
 "fsl,elo-dma-channel" or "fsl,eloplus-dma-channel" to be used by the Elo DMA
 driver (fsldma).  Any DMA channel used by fsldma cannot be used by another
index a2b5663eae266d2dcae8fcf9400ef9eb16b24709..dd77a81bdb80b82b5c732ceb33ba2ef30b1d9eea 100644 (file)
@@ -15,39 +15,48 @@ be built as module or inside kernel. Let's consider those cases.
 
        Part 2 - When dmatest is built as a module...
 
-After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest
-folder with nodes will be created. There are two important files located. First
-is the 'run' node that controls run and stop phases of the test, and the second
-one, 'results', is used to get the test case results.
-
-Note that in this case test will not run on load automatically.
-
 Example of usage:
+       % modprobe dmatest channel=dma0chan0 timeout=2000 iterations=1 run=1
+
+...or:
+       % modprobe dmatest
        % echo dma0chan0 > /sys/module/dmatest/parameters/channel
        % echo 2000 > /sys/module/dmatest/parameters/timeout
        % echo 1 > /sys/module/dmatest/parameters/iterations
-       % echo 1 > /sys/kernel/debug/dmatest/run
+       % echo 1 > /sys/module/dmatest/parameters/run
+
+...or on the kernel command line:
+
+       dmatest.channel=dma0chan0 dmatest.timeout=2000 dmatest.iterations=1 dmatest.run=1
 
 Hint: available channel list could be extracted by running the following
 command:
        % ls -1 /sys/class/dma/
 
-After a while you will start to get messages about current status or error like
-in the original code.
+Once started a message like "dmatest: Started 1 threads using dma0chan0" is
+emitted.  After that only test failure messages are reported until the test
+stops.
 
 Note that running a new test will not stop any in progress test.
 
-The following command should return actual state of the test.
-       % cat /sys/kernel/debug/dmatest/run
-
-To wait for test done the user may perform a busy loop that checks the state.
-
-       % while [ $(cat /sys/kernel/debug/dmatest/run) = "Y" ]
-       > do
-       >       echo -n "."
-       >       sleep 1
-       > done
-       > echo
+The following command returns the state of the test.
+       % cat /sys/module/dmatest/parameters/run
+
+To wait for test completion userpace can poll 'run' until it is false, or use
+the wait parameter.  Specifying 'wait=1' when loading the module causes module
+initialization to pause until a test run has completed, while reading
+/sys/module/dmatest/parameters/wait waits for any running test to complete
+before returning.  For example, the following scripts wait for 42 tests
+to complete before exiting.  Note that if 'iterations' is set to 'infinite' then
+waiting is disabled.
+
+Example:
+       % modprobe dmatest run=1 iterations=42 wait=1
+       % modprobe -r dmatest
+...or:
+       % modprobe dmatest run=1 iterations=42
+       % cat /sys/module/dmatest/parameters/wait
+       % modprobe -r dmatest
 
        Part 3 - When built-in in the kernel...
 
@@ -62,21 +71,22 @@ case. You always could check them at run-time by running
 
        Part 4 - Gathering the test results
 
-The module provides a storage for the test results in the memory. The gathered
-data could be used after test is done.
+Test results are printed to the kernel log buffer with the format:
 
-The special file 'results' in the debugfs represents gathered data of the in
-progress test. The messages collected are printed to the kernel log as well.
+"dmatest: result <channel>: <test id>: '<error msg>' with src_off=<val> dst_off=<val> len=<val> (<err code>)"
 
 Example of output:
-       % cat /sys/kernel/debug/dmatest/results
-       dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
+       % dmesg | tail -n 1
+       dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
 
 The message format is unified across the different types of errors. A number in
 the parens represents additional information, e.g. error code, error counter,
-or status.
+or status.  A test thread also emits a summary line at completion listing the
+number of tests executed, number that failed, and a result code.
 
-Comparison between buffers is stored to the dedicated structure.
+Example:
+       % dmesg | tail -n 1
+       dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0)
 
-Note that the verify result is now accessible only via file 'results' in the
-debugfs.
+The details of a data miscompare error are also emitted, but do not follow the
+above format.
index 9dae59407437916759a73e00166b6ba04b2c4e52..5dd282dda55c5eca0fe50b6b1cfc0116d3cc7432 100644 (file)
@@ -70,6 +70,12 @@ Unless otherwise specified, all options default to off.
 
        See comments at the top of fs/btrfs/check-integrity.c for more info.
 
+  commit=<seconds>
+       Set the interval of periodic commit, 30 seconds by default. Higher
+       values defer data being synced to permanent storage with obvious
+       consequences when the system crashes. The upper bound is not forced,
+       but a warning is printed if it's more than 300 seconds (5 minutes).
+
   compress
   compress=<type>
   compress-force
@@ -154,7 +160,11 @@ Unless otherwise specified, all options default to off.
        Currently this scans a list of several previous tree roots and tries to 
        use the first readable.
 
- skip_balance
+  rescan_uuid_tree
+       Force check and rebuild procedure of the UUID tree. This should not
+       normally be needed.
+
+  skip_balance
        Skip automatic resume of interrupted balance operation after mount.
        May be resumed with "btrfs balance resume."
 
@@ -234,24 +244,14 @@ available from the git repository at the following location:
 
 These include the following tools:
 
-mkfs.btrfs: create a filesystem
-
-btrfsctl: control program to create snapshots and subvolumes:
+* mkfs.btrfs: create a filesystem
 
-       mount /dev/sda2 /mnt
-       btrfsctl -s new_subvol_name /mnt
-       btrfsctl -s snapshot_of_default /mnt/default
-       btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
-       btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
-       ls /mnt
-       default snapshot_of_a_snapshot snapshot_of_new_subvol
-       new_subvol_name snapshot_of_default
+* btrfs: a single tool to manage the filesystems, refer to the manpage for more details
 
-       Snapshots and subvolumes cannot be deleted right now, but you can
-       rm -rf all the files and directories inside them.
+* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem
 
-btrfsck: do a limited check of the FS extent trees.
+Other tools for specific tasks:
 
-btrfs-debug-tree: print all of the FS metadata in text form.  Example:
+* btrfs-convert: in-place conversion from ext2/3/4 filesystems
 
-       btrfs-debug-tree /dev/sda2 >& big_output_file
+* btrfs-image: dump filesystem metadata for debugging
index 9ca3e74a10e128b4103d00dc8d06c12e6127d4c8..50680a59a2ff9a913e449a1d71ced9fab3004fe4 100644 (file)
@@ -1190,15 +1190,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        owned by uid=0.
 
        ima_hash=       [IMA]
-                       Format: { "sha1" | "md5" }
+                       Format: { md5 | sha1 | rmd160 | sha256 | sha384
+                                  | sha512 | ... }
                        default: "sha1"
 
+                       The list of supported hash algorithms is defined
+                       in crypto/hash_info.h.
+
        ima_tcb         [IMA]
                        Load a policy which meets the needs of the Trusted
                        Computing Base.  This means IMA will measure all
                        programs exec'd, files mmap'd for exec, and all files
                        opened for read by uid=0.
 
+       ima_template=   [IMA]
+                       Select one of defined IMA measurements template formats.
+                       Formats: { "ima" | "ima-ng" }
+                       Default: "ima-ng"
+
        init=           [KNL]
                        Format: <full_path>
                        Run specified binary instead of /sbin/init as init
index 0f54333b0ff2990ce090f88087e17417a894b46c..b6ce00b2be9ae9682c6821bfb70175e42cc75e57 100644 (file)
@@ -547,13 +547,11 @@ helper functions described in Section 4.  In that case, pm_runtime_resume()
 should be used.  Of course, for this purpose the device's runtime PM has to be
 enabled earlier by calling pm_runtime_enable().
 
-If the device bus type's or driver's ->probe() callback runs
-pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
-they will fail returning -EAGAIN, because the device's usage counter is
-incremented by the driver core before executing ->probe().  Still, it may be
-desirable to suspend the device as soon as ->probe() has finished, so the driver
-core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for
-the device at that time.
+It may be desirable to suspend the device once ->probe() has finished.
+Therefore the driver core uses the asyncronous pm_request_idle() to submit a
+request to execute the subsystem-level idle callback for the device at that
+time.  A driver that makes use of the runtime autosuspend feature, may want to
+update the last busy mark before returning from ->probe().
 
 Moreover, the driver core prevents runtime PM callbacks from racing with the bus
 notifier callback in __device_release_driver(), which is necessary, because the
@@ -656,7 +654,7 @@ out the following operations:
     __pm_runtime_disable() with 'false' as the second argument for every device
     right before executing the subsystem-level .suspend_late() callback for it.
 
-  * During system resume it calls pm_runtime_enable() and pm_runtime_put_sync()
+  * During system resume it calls pm_runtime_enable() and pm_runtime_put()
     for every device right after executing the subsystem-level .resume_early()
     callback and right after executing the subsystem-level .resume() callback
     for it, respectively.
index 414235c1fcfcdd3f4a8ca191df7edf7cf21049ad..45c82fd3e9d39bacaa1febcabba942446c2afda4 100644 (file)
@@ -22,3 +22,5 @@ keys.txt
        - description of the kernel key retention service.
 tomoyo.txt
        - documentation on the TOMOYO Linux Security Module.
+IMA-templates.txt
+       - documentation on the template management mechanism for IMA.
diff --git a/Documentation/security/IMA-templates.txt b/Documentation/security/IMA-templates.txt
new file mode 100644 (file)
index 0000000..a777e5f
--- /dev/null
@@ -0,0 +1,87 @@
+                       IMA Template Management Mechanism
+
+
+==== INTRODUCTION ====
+
+The original 'ima' template is fixed length, containing the filedata hash
+and pathname. The filedata hash is limited to 20 bytes (md5/sha1).
+The pathname is a null terminated string, limited to 255 characters.
+To overcome these limitations and to add additional file metadata, it is
+necessary to extend the current version of IMA by defining additional
+templates. For example, information that could be possibly reported are
+the inode UID/GID or the LSM labels either of the inode and of the process
+that is accessing it.
+
+However, the main problem to introduce this feature is that, each time
+a new template is defined, the functions that generate and display
+the measurements list would include the code for handling a new format
+and, thus, would significantly grow over the time.
+
+The proposed solution solves this problem by separating the template
+management from the remaining IMA code. The core of this solution is the
+definition of two new data structures: a template descriptor, to determine
+which information should be included in the measurement list; a template
+field, to generate and display data of a given type.
+
+Managing templates with these structures is very simple. To support
+a new data type, developers define the field identifier and implement
+two functions, init() and show(), respectively to generate and display
+measurement entries. Defining a new template descriptor requires
+specifying the template format, a string of field identifiers separated
+by the '|' character. While in the current implementation it is possible
+to define new template descriptors only by adding their definition in the
+template specific code (ima_template.c), in a future version it will be
+possible to register a new template on a running kernel by supplying to IMA
+the desired format string. In this version, IMA initializes at boot time
+all defined template descriptors by translating the format into an array
+of template fields structures taken from the set of the supported ones.
+
+After the initialization step, IMA will call ima_alloc_init_template()
+(new function defined within the patches for the new template management
+mechanism) to generate a new measurement entry by using the template
+descriptor chosen through the kernel configuration or through the newly
+introduced 'ima_template=' kernel command line parameter. It is during this
+phase that the advantages of the new architecture are clearly shown:
+the latter function will not contain specific code to handle a given template
+but, instead, it simply calls the init() method of the template fields
+associated to the chosen template descriptor and store the result (pointer
+to allocated data and data length) in the measurement entry structure.
+
+The same mechanism is employed to display measurements entries.
+The functions ima[_ascii]_measurements_show() retrieve, for each entry,
+the template descriptor used to produce that entry and call the show()
+method for each item of the array of template fields structures.
+
+
+
+==== SUPPORTED TEMPLATE FIELDS AND DESCRIPTORS ====
+
+In the following, there is the list of supported template fields
+('<identifier>': description), that can be used to define new template
+descriptors by adding their identifier to the format string
+(support for more data types will be added later):
+
+ - 'd': the digest of the event (i.e. the digest of a measured file),
+        calculated with the SHA1 or MD5 hash algorithm;
+ - 'n': the name of the event (i.e. the file name), with size up to 255 bytes;
+ - 'd-ng': the digest of the event, calculated with an arbitrary hash
+           algorithm (field format: [<hash algo>:]digest, where the digest
+           prefix is shown only if the hash algorithm is not SHA1 or MD5);
+ - 'n-ng': the name of the event, without size limitations.
+
+
+Below, there is the list of defined template descriptors:
+ - "ima": its format is 'd|n';
+ - "ima-ng" (default): its format is 'd-ng|n-ng'.
+
+
+
+==== USE ====
+
+To specify the template descriptor to be used to generate measurement entries,
+currently the following methods are supported:
+
+ - select a template descriptor among those supported in the kernel
+   configuration ('ima-ng' is the default choice);
+ - specify a template descriptor name from the kernel command line through
+   the 'ima_template=' parameter.
index 7b4145d00452f259fe79eff4f400ecfc949a6fd1..a4c33f1a7c6de5dc2207a21bab00846266668f90 100644 (file)
@@ -865,15 +865,14 @@ encountered:
      calling processes has a searchable link to the key from one of its
      keyrings. There are three functions for dealing with these:
 
-       key_ref_t make_key_ref(const struct key *key,
-                              unsigned long possession);
+       key_ref_t make_key_ref(const struct key *key, bool possession);
 
        struct key *key_ref_to_ptr(const key_ref_t key_ref);
 
-       unsigned long is_key_possessed(const key_ref_t key_ref);
+       bool is_key_possessed(const key_ref_t key_ref);
 
      The first function constructs a key reference from a key pointer and
-     possession information (which must be 0 or 1 and not any other value).
+     possession information (which must be true or false).
 
      The second function retrieves the key pointer from a reference and the
      third retrieves the possession flag.
@@ -961,14 +960,17 @@ payload contents" for more information.
     the argument will not be parsed.
 
 
-(*) Extra references can be made to a key by calling the following function:
+(*) Extra references can be made to a key by calling one of the following
+    functions:
 
+       struct key *__key_get(struct key *key);
        struct key *key_get(struct key *key);
 
-    These need to be disposed of by calling key_put() when they've been
-    finished with. The key pointer passed in will be returned. If the pointer
-    is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
-    no increment will take place.
+    Keys so references will need to be disposed of by calling key_put() when
+    they've been finished with.  The key pointer passed in will be returned.
+
+    In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
+    then the key will not be dereferenced and no increment will take place.
 
 
 (*) A key's serial number can be obtained by calling:
index 7521d367f21d689a70faa048b2a57b781fbb143e..6dea4fd5c96100d75f9152ffd8ac9be5909acf5f 100644 (file)
@@ -63,9 +63,9 @@ levels.
 PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table
 allocation and pgtable_pmd_page_dtor() on freeing.
 
-Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but
-make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE
-preallocate few PMDs on pgd_alloc().
+Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and
+pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing
+paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
 
 With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
 
index 63f30484932b1cddbe5f9c5bd05ca6473d612e0b..8285ed4676b6388502be84ddde71d1e201827ce8 100644 (file)
@@ -4065,6 +4065,7 @@ F:        arch/x86/include/uapi/asm/hyperv.h
 F:     arch/x86/kernel/cpu/mshyperv.c
 F:     drivers/hid/hid-hyperv.c
 F:     drivers/hv/
+F:     drivers/input/serio/hyperv-keyboard.c
 F:     drivers/net/hyperv/
 F:     drivers/scsi/storvsc_drv.c
 F:     drivers/video/hyperv_fb.c
@@ -7515,9 +7516,10 @@ SELINUX SECURITY MODULE
 M:     Stephen Smalley <sds@tycho.nsa.gov>
 M:     James Morris <james.l.morris@oracle.com>
 M:     Eric Paris <eparis@parisplace.org>
+M:     Paul Moore <paul@paul-moore.com>
 L:     selinux@tycho.nsa.gov (subscribers-only, general discussion)
 W:     http://selinuxproject.org
-T:     git git://git.infradead.org/users/eparis/selinux.git
+T:     git git://git.infradead.org/users/pcmoore/selinux
 S:     Supported
 F:     include/linux/selinux*
 F:     security/selinux/
@@ -8664,6 +8666,7 @@ F:        drivers/media/usb/tm6000/
 TPM DEVICE DRIVER
 M:     Leonidas Da Silva Barbosa <leosilva@linux.vnet.ibm.com>
 M:     Ashley Lai <ashley@ashleylai.com>
+M:     Peter Huewe <peterhuewe@gmx.de>
 M:     Rajiv Andrade <mail@srajiv.net>
 W:     http://tpmdd.sourceforge.net
 M:     Marcel Selhorst <tpmdd@selhorst.net>
@@ -9522,8 +9525,8 @@ F:        drivers/xen/*swiotlb*
 
 XFS FILESYSTEM
 P:     Silicon Graphics Inc
+M:     Dave Chinner <dchinner@fromorbit.com>
 M:     Ben Myers <bpm@sgi.com>
-M:     Alex Elder <elder@kernel.org>
 M:     xfs@oss.sgi.com
 L:     xfs@oss.sgi.com
 W:     http://oss.sgi.com/projects/xfs
index 135c674eaf9ec3383b309e7869ea261841a82fe4..d39dc9b95a2c6810ae920ed9d5dbdadbcbb37de5 100644 (file)
@@ -16,8 +16,8 @@ config ALPHA
        select ARCH_WANT_IPC_PARSE_VERSION
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+       select GENERIC_CLOCKEVENTS
        select GENERIC_SMP_IDLE_THREAD
-       select GENERIC_CMOS_UPDATE
        select GENERIC_STRNCPY_FROM_USER
        select GENERIC_STRNLEN_USER
        select HAVE_MOD_ARCH_SPECIFIC
@@ -488,6 +488,20 @@ config VGA_HOSE
          which always have multiple hoses, and whose consoles support it.
 
 
+config ALPHA_QEMU
+       bool "Run under QEMU emulation"
+       depends on !ALPHA_GENERIC
+       ---help---
+         Assume the presence of special features supported by QEMU PALcode
+         that reduce the overhead of system emulation.
+
+         Generic kernels will auto-detect QEMU.  But when building a
+         system-specific kernel, the assumption is that we want to
+         elimiate as many runtime tests as possible.
+
+         If unsure, say N.
+
+
 config ALPHA_SRM
        bool "Use SRM as bootloader" if ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_NAUTILUS || ALPHA_NONAME
        depends on TTY
@@ -572,6 +586,30 @@ config NUMA
          Access).  This option is for configuring high-end multiprocessor
          server machines.  If in doubt, say N.
 
+config ALPHA_WTINT
+       bool "Use WTINT" if ALPHA_SRM || ALPHA_GENERIC
+       default y if ALPHA_QEMU
+       default n if ALPHA_EV5 || ALPHA_EV56 || (ALPHA_EV4 && !ALPHA_LCA)
+       default n if !ALPHA_SRM && !ALPHA_GENERIC
+       default y if SMP
+       ---help---
+         The Wait for Interrupt (WTINT) PALcall attempts to place the CPU
+         to sleep until the next interrupt.  This may reduce the power
+         consumed, and the heat produced by the computer.  However, it has
+         the side effect of making the cycle counter unreliable as a timing
+         device across the sleep.
+
+         For emulation under QEMU, definitely say Y here, as we have other
+         mechanisms for measuring time than the cycle counter.
+
+         For EV4 (but not LCA), EV5 and EV56 systems, or for systems running
+         MILO, sleep mode is not supported so you might as well say N here.
+
+         For SMP systems we cannot use the cycle counter for timing anyway,
+         so you might as well say Y here.
+
+         If unsure, say N.
+
 config NODES_SHIFT
        int
        default "7"
@@ -613,9 +651,41 @@ config VERBOSE_MCHECK_ON
 
          Take the default (1) unless you want more control or more info.
 
+choice
+       prompt "Timer interrupt frequency (HZ)?"
+       default HZ_128 if ALPHA_QEMU
+       default HZ_1200 if ALPHA_RAWHIDE
+       default HZ_1024
+       ---help---
+         The frequency at which timer interrupts occur.  A high frequency
+         minimizes latency, whereas a low frequency minimizes overhead of
+         process accounting.  The later effect is especially significant
+         when being run under QEMU.
+
+         Note that some Alpha hardware cannot change the interrupt frequency
+         of the timer.  If unsure, say 1024 (or 1200 for Rawhide).
+
+       config HZ_32
+               bool "32 Hz"
+       config HZ_64
+               bool "64 Hz"
+       config HZ_128
+               bool "128 Hz"
+       config HZ_256
+               bool "256 Hz"
+       config HZ_1024
+               bool "1024 Hz"
+       config HZ_1200
+               bool "1200 Hz"
+endchoice
+
 config HZ
-       int
-       default 1200 if ALPHA_RAWHIDE
+       int 
+       default 32 if HZ_32
+       default 64 if HZ_64
+       default 128 if HZ_128
+       default 256 if HZ_256
+       default 1200 if HZ_1200
        default 1024
 
 source "drivers/pci/Kconfig"
index 72dbf235927054145d8c18c14968d452efd2c494..75cb3641ed2f0b507c1bcd6a86cb2d8da2966044 100644 (file)
@@ -33,6 +33,7 @@ struct alpha_machine_vector
 
        int nr_irqs;
        int rtc_port;
+       int rtc_boot_cpu_only;
        unsigned int max_asn;
        unsigned long max_isa_dma_address;
        unsigned long irq_probe_mask;
@@ -95,9 +96,6 @@ struct alpha_machine_vector
 
        struct _alpha_agp_info *(*agp_info)(void);
 
-       unsigned int (*rtc_get_time)(struct rtc_time *);
-       int (*rtc_set_time)(struct rtc_time *);
-
        const char *vector_name;
 
        /* NUMA information */
@@ -126,13 +124,19 @@ extern struct alpha_machine_vector alpha_mv;
 
 #ifdef CONFIG_ALPHA_GENERIC
 extern int alpha_using_srm;
+extern int alpha_using_qemu;
 #else
-#ifdef CONFIG_ALPHA_SRM
-#define alpha_using_srm 1
-#else
-#define alpha_using_srm 0
-#endif
+# ifdef CONFIG_ALPHA_SRM
+#  define alpha_using_srm 1
+# else
+#  define alpha_using_srm 0
+# endif
+# ifdef CONFIG_ALPHA_QEMU
+#  define alpha_using_qemu 1
+# else
+#  define alpha_using_qemu 0
+# endif
 #endif /* GENERIC */
 
-#endif
+#endif /* __KERNEL__ */
 #endif /* __ALPHA_MACHVEC_H */
index 6fcd2b5b08f0d5ac4462ba6616a66519c2819d7d..5422a47646fc20add2047b57d41a91593905e242 100644 (file)
@@ -89,6 +89,7 @@ __CALL_PAL_W1(wrmces, unsigned long);
 __CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long);
 __CALL_PAL_W1(wrusp, unsigned long);
 __CALL_PAL_W1(wrvptptr, unsigned long);
+__CALL_PAL_RW1(wtint, unsigned long, unsigned long);
 
 /*
  * TB routines..
@@ -111,5 +112,75 @@ __CALL_PAL_W1(wrvptptr, unsigned long);
 #define tbiap()                __tbi(-1, /* no second argument */)
 #define tbia()         __tbi(-2, /* no second argument */)
 
+/*
+ * QEMU Cserv routines..
+ */
+
+static inline unsigned long
+qemu_get_walltime(void)
+{
+       register unsigned long v0 __asm__("$0");
+       register unsigned long a0 __asm__("$16") = 3;
+
+       asm("call_pal %2 # cserve get_time"
+           : "=r"(v0), "+r"(a0)
+           : "i"(PAL_cserve)
+           : "$17", "$18", "$19", "$20", "$21");
+
+       return v0;
+}
+
+static inline unsigned long
+qemu_get_alarm(void)
+{
+       register unsigned long v0 __asm__("$0");
+       register unsigned long a0 __asm__("$16") = 4;
+
+       asm("call_pal %2 # cserve get_alarm"
+           : "=r"(v0), "+r"(a0)
+           : "i"(PAL_cserve)
+           : "$17", "$18", "$19", "$20", "$21");
+
+       return v0;
+}
+
+static inline void
+qemu_set_alarm_rel(unsigned long expire)
+{
+       register unsigned long a0 __asm__("$16") = 5;
+       register unsigned long a1 __asm__("$17") = expire;
+
+       asm volatile("call_pal %2 # cserve set_alarm_rel"
+                    : "+r"(a0), "+r"(a1)
+                    : "i"(PAL_cserve)
+                    : "$0", "$18", "$19", "$20", "$21");
+}
+
+static inline void
+qemu_set_alarm_abs(unsigned long expire)
+{
+       register unsigned long a0 __asm__("$16") = 6;
+       register unsigned long a1 __asm__("$17") = expire;
+
+       asm volatile("call_pal %2 # cserve set_alarm_abs"
+                    : "+r"(a0), "+r"(a1)
+                    : "i"(PAL_cserve)
+                    : "$0", "$18", "$19", "$20", "$21");
+}
+
+static inline unsigned long
+qemu_get_vmtime(void)
+{
+       register unsigned long v0 __asm__("$0");
+       register unsigned long a0 __asm__("$16") = 7;
+
+       asm("call_pal %2 # cserve get_time"
+           : "=r"(v0), "+r"(a0)
+           : "i"(PAL_cserve)
+           : "$17", "$18", "$19", "$20", "$21");
+
+       return v0;
+}
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ALPHA_PAL_H */
index d70408d36677c86d0fbd8ce90530724e54d15a57..f71c3b0ed3606c7fc96ab6ee45b66ba324dc30ee 100644 (file)
@@ -1,12 +1 @@
-#ifndef _ALPHA_RTC_H
-#define _ALPHA_RTC_H
-
-#if defined(CONFIG_ALPHA_MARVEL) && defined(CONFIG_SMP) \
- || defined(CONFIG_ALPHA_GENERIC)
-# define get_rtc_time          alpha_mv.rtc_get_time
-# define set_rtc_time          alpha_mv.rtc_set_time
-#endif
-
 #include <asm-generic/rtc.h>
-
-#endif
index b02b8a282940fd3e64bde0d757fdb4f6a5ba94c7..c2911f5917041abd49dea5f14855ac9691d1aec0 100644 (file)
@@ -22,15 +22,27 @@ extern void * __memcpy(void *, const void *, size_t);
 
 #define __HAVE_ARCH_MEMSET
 extern void * __constant_c_memset(void *, unsigned long, size_t);
+extern void * ___memset(void *, int, size_t);
 extern void * __memset(void *, int, size_t);
 extern void * memset(void *, int, size_t);
 
-#define memset(s, c, n)                                                            \
-(__builtin_constant_p(c)                                                   \
- ? (__builtin_constant_p(n) && (c) == 0                                            \
-    ? __builtin_memset((s),0,(n))                                          \
-    : __constant_c_memset((s),0x0101010101010101UL*(unsigned char)(c),(n))) \
- : __memset((s),(c),(n)))
+/* For gcc 3.x, we cannot have the inline function named "memset" because
+   the __builtin_memset will attempt to resolve to the inline as well,
+   leading to a "sorry" about unimplemented recursive inlining.  */
+extern inline void *__memset(void *s, int c, size_t n)
+{
+       if (__builtin_constant_p(c)) {
+               if (__builtin_constant_p(n)) {
+                       return __builtin_memset(s, c, n);
+               } else {
+                       unsigned long c8 = (c & 0xff) * 0x0101010101010101UL;
+                       return __constant_c_memset(s, c8, n);
+               }
+       }
+       return ___memset(s, c, n);
+}
+
+#define memset __memset
 
 #define __HAVE_ARCH_STRCPY
 extern char * strcpy(char *,const char *);
index 3c0ce08e5f592d9b779c10a441527cd9061e9c07..dfc8140b908821d46a3d82f30baaa121c1bf64ef 100644 (file)
@@ -46,6 +46,7 @@
 #define PAL_rdusp      58
 #define PAL_whami      60
 #define PAL_retsys     61
+#define PAL_wtint      62
 #define PAL_rti                63
 
 
index 84ec46b38f7dc1c39043f8671bc60b3db1a62b9d..0d54650e78fc6b622272d88ac6406273f8e9a282 100644 (file)
@@ -16,6 +16,7 @@ obj-$(CONFIG_PCI)     += pci.o pci_iommu.o pci-sysfs.o
 obj-$(CONFIG_SRM_ENV)  += srm_env.o
 obj-$(CONFIG_MODULES)  += module.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_RTC_DRV_ALPHA) += rtc.o
 
 ifdef CONFIG_ALPHA_GENERIC
 
index 89566b346c0f802bd5bfe0c817bd2fee9bd39d3e..f4c7ab6f43b0dc167bce50125c812e79ebb2d067 100644 (file)
@@ -40,6 +40,7 @@ EXPORT_SYMBOL(strrchr);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(__memcpy);
 EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(___memset);
 EXPORT_SYMBOL(__memsetw);
 EXPORT_SYMBOL(__constant_c_memset);
 EXPORT_SYMBOL(copy_page);
index 28e4429596f3f208c1b868887112bad4faa33987..1c8625cb0e253fbc57e1b341fe48867bd7369e96 100644 (file)
@@ -66,21 +66,7 @@ do_entInt(unsigned long type, unsigned long vector,
                break;
        case 1:
                old_regs = set_irq_regs(regs);
-#ifdef CONFIG_SMP
-         {
-               long cpu;
-
-               smp_percpu_timer_interrupt(regs);
-               cpu = smp_processor_id();
-               if (cpu != boot_cpuid) {
-                       kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ));
-               } else {
-                       handle_irq(RTC_IRQ);
-               }
-         }
-#else
                handle_irq(RTC_IRQ);
-#endif
                set_irq_regs(old_regs);
                return;
        case 2:
@@ -228,7 +214,7 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr,
  */
 
 struct irqaction timer_irqaction = {
-       .handler        = timer_interrupt,
+       .handler        = rtc_timer_interrupt,
        .name           = "timer",
 };
 
index 7fa62488bd16791f77b0218451629d749c2e5351..f54bdf658cd0b9ff6b72f71fc40504bd8dff252e 100644 (file)
 #define CAT1(x,y)  x##y
 #define CAT(x,y)   CAT1(x,y)
 
-#define DO_DEFAULT_RTC \
-       .rtc_port = 0x70, \
-       .rtc_get_time = common_get_rtc_time, \
-       .rtc_set_time = common_set_rtc_time
+#define DO_DEFAULT_RTC                 .rtc_port = 0x70
 
 #define DO_EV4_MMU                                                     \
        .max_asn =                      EV4_MAX_ASN,                    \
index d821b17047e0abbe54dd82871d3cd9e324d28df8..c52e7f0ee5f6084bd2c8068a552659f97edd9a8d 100644 (file)
@@ -83,6 +83,8 @@ struct alpha_pmu_t {
        long pmc_left[3];
         /* Subroutine for allocation of PMCs.  Enforces constraints. */
        int (*check_constraints)(struct perf_event **, unsigned long *, int);
+       /* Subroutine for checking validity of a raw event for this PMU. */
+       int (*raw_event_valid)(u64 config);
 };
 
 /*
@@ -203,6 +205,12 @@ success:
 }
 
 
+static int ev67_raw_event_valid(u64 config)
+{
+       return config >= EV67_CYCLES && config < EV67_LAST_ET;
+};
+
+
 static const struct alpha_pmu_t ev67_pmu = {
        .event_map = ev67_perfmon_event_map,
        .max_events = ARRAY_SIZE(ev67_perfmon_event_map),
@@ -211,7 +219,8 @@ static const struct alpha_pmu_t ev67_pmu = {
        .pmc_count_mask = {EV67_PCTR_0_COUNT_MASK,  EV67_PCTR_1_COUNT_MASK,  0},
        .pmc_max_period = {(1UL<<20) - 1, (1UL<<20) - 1, 0},
        .pmc_left = {16, 4, 0},
-       .check_constraints = ev67_check_constraints
+       .check_constraints = ev67_check_constraints,
+       .raw_event_valid = ev67_raw_event_valid,
 };
 
 
@@ -609,7 +618,9 @@ static int __hw_perf_event_init(struct perf_event *event)
        } else if (attr->type == PERF_TYPE_HW_CACHE) {
                return -EOPNOTSUPP;
        } else if (attr->type == PERF_TYPE_RAW) {
-               ev = attr->config & 0xff;
+               if (!alpha_pmu->raw_event_valid(attr->config))
+                       return -EINVAL;
+               ev = attr->config;
        } else {
                return -EOPNOTSUPP;
        }
index f2360a74e5d5544983160d951c46bddb98819e0e..1941a07b5811f925aed82e853aab4efb081f74ca 100644 (file)
 void (*pm_power_off)(void) = machine_power_off;
 EXPORT_SYMBOL(pm_power_off);
 
+#ifdef CONFIG_ALPHA_WTINT
+/*
+ * Sleep the CPU.
+ * EV6, LCA45 and QEMU know how to power down, skipping N timer interrupts.
+ */
+void arch_cpu_idle(void)
+{
+       wtint(0);
+       local_irq_enable();
+}
+
+void arch_cpu_idle_dead(void)
+{
+       wtint(INT_MAX);
+}
+#endif /* ALPHA_WTINT */
+
 struct halt_info {
        int mode;
        char *restart_cmd;
index d3e52d3fd59299771ed3e90d0fc13f0aca9faa6f..da2d6ec9c37065ca48265597cc6cd8b91f6e2796 100644 (file)
@@ -135,17 +135,15 @@ extern void unregister_srm_console(void);
 /* smp.c */
 extern void setup_smp(void);
 extern void handle_ipi(struct pt_regs *);
-extern void smp_percpu_timer_interrupt(struct pt_regs *);
 
 /* bios32.c */
 /* extern void reset_for_srm(void); */
 
 /* time.c */
-extern irqreturn_t timer_interrupt(int irq, void *dev);
+extern irqreturn_t rtc_timer_interrupt(int irq, void *dev);
+extern void init_clockevent(void);
 extern void common_init_rtc(void);
 extern unsigned long est_cycle_freq;
-extern unsigned int common_get_rtc_time(struct rtc_time *time);
-extern int common_set_rtc_time(struct rtc_time *time);
 
 /* smc37c93x.c */
 extern void SMC93x_Init(void);
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
new file mode 100644 (file)
index 0000000..c8d284d
--- /dev/null
@@ -0,0 +1,323 @@
+/*
+ *  linux/arch/alpha/kernel/rtc.c
+ *
+ *  Copyright (C) 1991, 1992, 1995, 1999, 2000  Linus Torvalds
+ *
+ * This file contains date handling.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+
+#include <asm/rtc.h>
+
+#include "proto.h"
+
+
+/*
+ * Support for the RTC device.
+ *
+ * We don't want to use the rtc-cmos driver, because we don't want to support
+ * alarms, as that would be indistinguishable from timer interrupts.
+ *
+ * Further, generic code is really, really tied to a 1900 epoch.  This is
+ * true in __get_rtc_time as well as the users of struct rtc_time e.g.
+ * rtc_tm_to_time.  Thankfully all of the other epochs in use are later
+ * than 1900, and so it's easy to adjust.
+ */
+
+static unsigned long rtc_epoch;
+
+static int __init
+specifiy_epoch(char *str)
+{
+       unsigned long epoch = simple_strtoul(str, NULL, 0);
+       if (epoch < 1900)
+               printk("Ignoring invalid user specified epoch %lu\n", epoch);
+       else
+               rtc_epoch = epoch;
+       return 1;
+}
+__setup("epoch=", specifiy_epoch);
+
+static void __init
+init_rtc_epoch(void)
+{
+       int epoch, year, ctrl;
+
+       if (rtc_epoch != 0) {
+               /* The epoch was specified on the command-line.  */
+               return;
+       }
+
+       /* Detect the epoch in use on this computer.  */
+       ctrl = CMOS_READ(RTC_CONTROL);
+       year = CMOS_READ(RTC_YEAR);
+       if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+               year = bcd2bin(year);
+
+       /* PC-like is standard; used for year >= 70 */
+       epoch = 1900;
+       if (year < 20) {
+               epoch = 2000;
+       } else if (year >= 20 && year < 48) {
+               /* NT epoch */
+               epoch = 1980;
+       } else if (year >= 48 && year < 70) {
+               /* Digital UNIX epoch */
+               epoch = 1952;
+       }
+       rtc_epoch = epoch;
+
+       printk(KERN_INFO "Using epoch %d for rtc year %d\n", epoch, year);
+}
+
+static int
+alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       __get_rtc_time(tm);
+
+       /* Adjust for non-default epochs.  It's easier to depend on the
+          generic __get_rtc_time and adjust the epoch here than create
+          a copy of __get_rtc_time with the edits we need.  */
+       if (rtc_epoch != 1900) {
+               int year = tm->tm_year;
+               /* Undo the century adjustment made in __get_rtc_time.  */
+               if (year >= 100)
+                       year -= 100;
+               year += rtc_epoch - 1900;
+               /* Redo the century adjustment with the epoch in place.  */
+               if (year <= 69)
+                       year += 100;
+               tm->tm_year = year;
+       }
+
+       return rtc_valid_tm(tm);
+}
+
+static int
+alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct rtc_time xtm;
+
+       if (rtc_epoch != 1900) {
+               xtm = *tm;
+               xtm.tm_year -= rtc_epoch - 1900;
+               tm = &xtm;
+       }
+
+       return __set_rtc_time(tm);
+}
+
+static int
+alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime)
+{
+       int retval = 0;
+       int real_seconds, real_minutes, cmos_minutes;
+       unsigned char save_control, save_freq_select;
+
+       /* Note: This code only updates minutes and seconds.  Comments
+          indicate this was to avoid messing with unknown time zones,
+          and with the epoch nonsense described above.  In order for
+          this to work, the existing clock cannot be off by more than
+          15 minutes.
+
+          ??? This choice is may be out of date.  The x86 port does
+          not have problems with timezones, and the epoch processing has
+          now been fixed in alpha_set_rtc_time.
+
+          In either case, one can always force a full rtc update with
+          the userland hwclock program, so surely 15 minute accuracy
+          is no real burden.  */
+
+       /* In order to set the CMOS clock precisely, we have to be called
+          500 ms after the second nowtime has started, because when
+          nowtime is written into the registers of the CMOS clock, it will
+          jump to the next second precisely 500 ms later. Check the Motorola
+          MC146818A or Dallas DS12887 data sheet for details.  */
+
+       /* irq are locally disabled here */
+       spin_lock(&rtc_lock);
+       /* Tell the clock it's being set */
+       save_control = CMOS_READ(RTC_CONTROL);
+       CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+       /* Stop and reset prescaler */
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+       CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+       cmos_minutes = CMOS_READ(RTC_MINUTES);
+       if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+               cmos_minutes = bcd2bin(cmos_minutes);
+
+       real_seconds = nowtime % 60;
+       real_minutes = nowtime / 60;
+       if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) {
+               /* correct for half hour time zone */
+               real_minutes += 30;
+       }
+       real_minutes %= 60;
+
+       if (abs(real_minutes - cmos_minutes) < 30) {
+               if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+                       real_seconds = bin2bcd(real_seconds);
+                       real_minutes = bin2bcd(real_minutes);
+               }
+               CMOS_WRITE(real_seconds,RTC_SECONDS);
+               CMOS_WRITE(real_minutes,RTC_MINUTES);
+       } else {
+               printk_once(KERN_NOTICE
+                           "set_rtc_mmss: can't update from %d to %d\n",
+                           cmos_minutes, real_minutes);
+               retval = -1;
+       }
+
+       /* The following flags have to be released exactly in this order,
+        * otherwise the DS12887 (popular MC146818A clone with integrated
+        * battery and quartz) will not reset the oscillator and will not
+        * update precisely 500 ms later. You won't find this mentioned in
+        * the Dallas Semiconductor data sheets, but who believes data
+        * sheets anyway ...                           -- Markus Kuhn
+        */
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       spin_unlock(&rtc_lock);
+
+       return retval;
+}
+
+static int
+alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case RTC_EPOCH_READ:
+               return put_user(rtc_epoch, (unsigned long __user *)arg);
+       case RTC_EPOCH_SET:
+               if (arg < 1900)
+                       return -EINVAL;
+               rtc_epoch = arg;
+               return 0;
+       default:
+               return -ENOIOCTLCMD;
+       }
+}
+
+static const struct rtc_class_ops alpha_rtc_ops = {
+       .read_time = alpha_rtc_read_time,
+       .set_time = alpha_rtc_set_time,
+       .set_mmss = alpha_rtc_set_mmss,
+       .ioctl = alpha_rtc_ioctl,
+};
+
+/*
+ * Similarly, except do the actual CMOS access on the boot cpu only.
+ * This requires marshalling the data across an interprocessor call.
+ */
+
+#if defined(CONFIG_SMP) && \
+    (defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_MARVEL))
+# define HAVE_REMOTE_RTC 1
+
+union remote_data {
+       struct rtc_time *tm;
+       unsigned long now;
+       long retval;
+};
+
+static void
+do_remote_read(void *data)
+{
+       union remote_data *x = data;
+       x->retval = alpha_rtc_read_time(NULL, x->tm);
+}
+
+static int
+remote_read_time(struct device *dev, struct rtc_time *tm)
+{
+       union remote_data x;
+       if (smp_processor_id() != boot_cpuid) {
+               x.tm = tm;
+               smp_call_function_single(boot_cpuid, do_remote_read, &x, 1);
+               return x.retval;
+       }
+       return alpha_rtc_read_time(NULL, tm);
+}
+
+static void
+do_remote_set(void *data)
+{
+       union remote_data *x = data;
+       x->retval = alpha_rtc_set_time(NULL, x->tm);
+}
+
+static int
+remote_set_time(struct device *dev, struct rtc_time *tm)
+{
+       union remote_data x;
+       if (smp_processor_id() != boot_cpuid) {
+               x.tm = tm;
+               smp_call_function_single(boot_cpuid, do_remote_set, &x, 1);
+               return x.retval;
+       }
+       return alpha_rtc_set_time(NULL, tm);
+}
+
+static void
+do_remote_mmss(void *data)
+{
+       union remote_data *x = data;
+       x->retval = alpha_rtc_set_mmss(NULL, x->now);
+}
+
+static int
+remote_set_mmss(struct device *dev, unsigned long now)
+{
+       union remote_data x;
+       if (smp_processor_id() != boot_cpuid) {
+               x.now = now;
+               smp_call_function_single(boot_cpuid, do_remote_mmss, &x, 1);
+               return x.retval;
+       }
+       return alpha_rtc_set_mmss(NULL, now);
+}
+
+static const struct rtc_class_ops remote_rtc_ops = {
+       .read_time = remote_read_time,
+       .set_time = remote_set_time,
+       .set_mmss = remote_set_mmss,
+       .ioctl = alpha_rtc_ioctl,
+};
+#endif
+
+static int __init
+alpha_rtc_init(void)
+{
+       const struct rtc_class_ops *ops;
+       struct platform_device *pdev;
+       struct rtc_device *rtc;
+       const char *name;
+
+       init_rtc_epoch();
+       name = "rtc-alpha";
+       ops = &alpha_rtc_ops;
+
+#ifdef HAVE_REMOTE_RTC
+       if (alpha_mv.rtc_boot_cpu_only)
+               ops = &remote_rtc_ops;
+#endif
+
+       pdev = platform_device_register_simple(name, -1, NULL, 0);
+       rtc = devm_rtc_device_register(&pdev->dev, name, ops, THIS_MODULE);
+       if (IS_ERR(rtc))
+               return PTR_ERR(rtc);
+
+       platform_set_drvdata(pdev, rtc);
+       return 0;
+}
+device_initcall(alpha_rtc_init);
index 9e3107cc5ebb45d7dccc2889462f9f10c4b575ec..b20af76f12c1dbf548a27210fdbb196a2c77496d 100644 (file)
@@ -115,10 +115,17 @@ unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE;
 
 #ifdef CONFIG_ALPHA_GENERIC
 struct alpha_machine_vector alpha_mv;
+#endif
+
+#ifndef alpha_using_srm
 int alpha_using_srm;
 EXPORT_SYMBOL(alpha_using_srm);
 #endif
 
+#ifndef alpha_using_qemu
+int alpha_using_qemu;
+#endif
+
 static struct alpha_machine_vector *get_sysvec(unsigned long, unsigned long,
                                               unsigned long);
 static struct alpha_machine_vector *get_sysvec_byname(const char *);
@@ -529,11 +536,15 @@ setup_arch(char **cmdline_p)
        atomic_notifier_chain_register(&panic_notifier_list,
                        &alpha_panic_block);
 
-#ifdef CONFIG_ALPHA_GENERIC
+#ifndef alpha_using_srm
        /* Assume that we've booted from SRM if we haven't booted from MILO.
           Detect the later by looking for "MILO" in the system serial nr.  */
        alpha_using_srm = strncmp((const char *)hwrpb->ssn, "MILO", 4) != 0;
 #endif
+#ifndef alpha_using_qemu
+       /* Similarly, look for QEMU.  */
+       alpha_using_qemu = strstr((const char *)hwrpb->ssn, "QEMU") != 0;
+#endif
 
        /* If we are using SRM, we want to allow callbacks
           as early as possible, so do this NOW, and then
@@ -1207,6 +1218,7 @@ show_cpuinfo(struct seq_file *f, void *slot)
        char *systype_name;
        char *sysvariation_name;
        int nr_processors;
+       unsigned long timer_freq;
 
        cpu_index = (unsigned) (cpu->type - 1);
        cpu_name = "Unknown";
@@ -1218,6 +1230,12 @@ show_cpuinfo(struct seq_file *f, void *slot)
 
        nr_processors = get_nr_processors(cpu, hwrpb->nr_processors);
 
+#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200
+       timer_freq = (100UL * hwrpb->intr_freq) / 4096;
+#else
+       timer_freq = 100UL * CONFIG_HZ;
+#endif
+
        seq_printf(f, "cpu\t\t\t: Alpha\n"
                      "cpu model\t\t: %s\n"
                      "cpu variation\t\t: %ld\n"
@@ -1243,8 +1261,7 @@ show_cpuinfo(struct seq_file *f, void *slot)
                       (char*)hwrpb->ssn,
                       est_cycle_freq ? : hwrpb->cycle_freq,
                       est_cycle_freq ? "est." : "",
-                      hwrpb->intr_freq / 4096,
-                      (100 * hwrpb->intr_freq / 4096) % 100,
+                      timer_freq / 100, timer_freq % 100,
                       hwrpb->pagesize,
                       hwrpb->pa_bits,
                       hwrpb->max_asn,
index 9dbbcb3b914675f80e3e0f097ced7e4545b533f9..99ac36d5de4efd10832804e82509e062606720e2 100644 (file)
@@ -138,9 +138,11 @@ smp_callin(void)
 
        /* Get our local ticker going. */
        smp_setup_percpu_timer(cpuid);
+       init_clockevent();
 
        /* Call platform-specific callin, if specified */
-       if (alpha_mv.smp_callin) alpha_mv.smp_callin();
+       if (alpha_mv.smp_callin)
+               alpha_mv.smp_callin();
 
        /* All kernel threads share the same mm context.  */
        atomic_inc(&init_mm.mm_count);
@@ -498,35 +500,6 @@ smp_cpus_done(unsigned int max_cpus)
               ((bogosum + 2500) / (5000/HZ)) % 100);
 }
 
-\f
-void
-smp_percpu_timer_interrupt(struct pt_regs *regs)
-{
-       struct pt_regs *old_regs;
-       int cpu = smp_processor_id();
-       unsigned long user = user_mode(regs);
-       struct cpuinfo_alpha *data = &cpu_data[cpu];
-
-       old_regs = set_irq_regs(regs);
-
-       /* Record kernel PC.  */
-       profile_tick(CPU_PROFILING);
-
-       if (!--data->prof_counter) {
-               /* We need to make like a normal interrupt -- otherwise
-                  timer interrupts ignore the global interrupt lock,
-                  which would be a Bad Thing.  */
-               irq_enter();
-
-               update_process_times(user);
-
-               data->prof_counter = data->prof_multiplier;
-
-               irq_exit();
-       }
-       set_irq_regs(old_regs);
-}
-
 int
 setup_profiling_timer(unsigned int multiplier)
 {
index 5a0af11b3a61c1b97b5e1da19fe1fb8f2dd15f50..608f2a7fa0a30f415e2bdef4c7424957080b8641 100644 (file)
@@ -224,8 +224,6 @@ struct alpha_machine_vector jensen_mv __initmv = {
        .machine_check          = jensen_machine_check,
        .max_isa_dma_address    = ALPHA_MAX_ISA_DMA_ADDRESS,
        .rtc_port               = 0x170,
-       .rtc_get_time           = common_get_rtc_time,
-       .rtc_set_time           = common_set_rtc_time,
 
        .nr_irqs                = 16,
        .device_interrupt       = jensen_device_interrupt,
index c92e389ff2192973f95f51073a5ce50e5f132a3b..f21d61fab6787331d21571958185b637fc601bb7 100644 (file)
@@ -22,7 +22,6 @@
 #include <asm/hwrpb.h>
 #include <asm/tlbflush.h>
 #include <asm/vga.h>
-#include <asm/rtc.h>
 
 #include "proto.h"
 #include "err_impl.h"
@@ -400,57 +399,6 @@ marvel_init_rtc(void)
        init_rtc_irq();
 }
 
-struct marvel_rtc_time {
-       struct rtc_time *time;
-       int retval;
-};
-
-#ifdef CONFIG_SMP
-static void
-smp_get_rtc_time(void *data)
-{
-       struct marvel_rtc_time *mrt = data;
-       mrt->retval = __get_rtc_time(mrt->time);
-}
-
-static void
-smp_set_rtc_time(void *data)
-{
-       struct marvel_rtc_time *mrt = data;
-       mrt->retval = __set_rtc_time(mrt->time);
-}
-#endif
-
-static unsigned int
-marvel_get_rtc_time(struct rtc_time *time)
-{
-#ifdef CONFIG_SMP
-       struct marvel_rtc_time mrt;
-
-       if (smp_processor_id() != boot_cpuid) {
-               mrt.time = time;
-               smp_call_function_single(boot_cpuid, smp_get_rtc_time, &mrt, 1);
-               return mrt.retval;
-       }
-#endif
-       return __get_rtc_time(time);
-}
-
-static int
-marvel_set_rtc_time(struct rtc_time *time)
-{
-#ifdef CONFIG_SMP
-       struct marvel_rtc_time mrt;
-
-       if (smp_processor_id() != boot_cpuid) {
-               mrt.time = time;
-               smp_call_function_single(boot_cpuid, smp_set_rtc_time, &mrt, 1);
-               return mrt.retval;
-       }
-#endif
-       return __set_rtc_time(time);
-}
-
 static void
 marvel_smp_callin(void)
 {
@@ -492,8 +440,7 @@ struct alpha_machine_vector marvel_ev7_mv __initmv = {
        .vector_name            = "MARVEL/EV7",
        DO_EV7_MMU,
        .rtc_port               = 0x70,
-       .rtc_get_time           = marvel_get_rtc_time,
-       .rtc_set_time           = marvel_set_rtc_time,
+       .rtc_boot_cpu_only      = 1,
        DO_MARVEL_IO,
        .machine_check          = marvel_machine_check,
        .max_isa_dma_address    = ALPHA_MAX_ISA_DMA_ADDRESS,
index ea3395036556ceea61c74c08452d5e7a7061e566..ee39cee8064caa4e930a06eb2fa35457ad6d390e 100644 (file)
@@ -3,13 +3,7 @@
  *
  *  Copyright (C) 1991, 1992, 1995, 1999, 2000  Linus Torvalds
  *
- * This file contains the PC-specific time handling details:
- * reading the RTC at bootup, etc..
- * 1994-07-02    Alan Modra
- *     fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26    Markus Kuhn
- *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- *      precision CMOS clock update
+ * This file contains the clocksource time handling.
  * 1997-09-10  Updated NTP code according to technical memorandum Jan '96
  *             "A Kernel Model for Precision Timekeeping" by Dave Mills
  * 1997-01-09    Adrian Sun
@@ -21,9 +15,6 @@
  * 1999-04-16  Thorsten Kranzkowski (dl8bcu@gmx.net)
  *     fixed algorithm in do_gettimeofday() for calculating the precise time
  *     from processor cycle counter (now taking lost_ticks into account)
- * 2000-08-13  Jan-Benedict Glaw <jbglaw@lug-owl.de>
- *     Fixed time_init to be aware of epoches != 1900. This prevents
- *     booting up in 2048 for me;) Code is stolen from rtc.c.
  * 2003-06-03  R. Scott Bailey <scott.bailey@eds.com>
  *     Tighten sanity in time_init from 1% (10,000 PPM) to 250 PPM
  */
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/hwrpb.h>
-#include <asm/rtc.h>
 
 #include <linux/mc146818rtc.h>
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <linux/clocksource.h>
+#include <linux/clockchips.h>
 
 #include "proto.h"
 #include "irq_impl.h"
 
-static int set_rtc_mmss(unsigned long);
-
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
 
-#define TICK_SIZE (tick_nsec / 1000)
-
-/*
- * Shift amount by which scaled_ticks_per_cycle is scaled.  Shifting
- * by 48 gives us 16 bits for HZ while keeping the accuracy good even
- * for large CPU clock rates.
- */
-#define FIX_SHIFT      48
-
-/* lump static variables together for more efficient access: */
-static struct {
-       /* cycle counter last time it got invoked */
-       __u32 last_time;
-       /* ticks/cycle * 2^48 */
-       unsigned long scaled_ticks_per_cycle;
-       /* partial unused tick */
-       unsigned long partial_tick;
-} state;
-
 unsigned long est_cycle_freq;
 
 #ifdef CONFIG_IRQ_WORK
@@ -108,109 +78,156 @@ static inline __u32 rpcc(void)
        return __builtin_alpha_rpcc();
 }
 
-int update_persistent_clock(struct timespec now)
-{
-       return set_rtc_mmss(now.tv_sec);
-}
 
-void read_persistent_clock(struct timespec *ts)
+\f
+/*
+ * The RTC as a clock_event_device primitive.
+ */
+
+static DEFINE_PER_CPU(struct clock_event_device, cpu_ce);
+
+irqreturn_t
+rtc_timer_interrupt(int irq, void *dev)
 {
-       unsigned int year, mon, day, hour, min, sec, epoch;
-
-       sec = CMOS_READ(RTC_SECONDS);
-       min = CMOS_READ(RTC_MINUTES);
-       hour = CMOS_READ(RTC_HOURS);
-       day = CMOS_READ(RTC_DAY_OF_MONTH);
-       mon = CMOS_READ(RTC_MONTH);
-       year = CMOS_READ(RTC_YEAR);
-
-       if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-               sec = bcd2bin(sec);
-               min = bcd2bin(min);
-               hour = bcd2bin(hour);
-               day = bcd2bin(day);
-               mon = bcd2bin(mon);
-               year = bcd2bin(year);
-       }
+       int cpu = smp_processor_id();
+       struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
 
-       /* PC-like is standard; used for year >= 70 */
-       epoch = 1900;
-       if (year < 20)
-               epoch = 2000;
-       else if (year >= 20 && year < 48)
-               /* NT epoch */
-               epoch = 1980;
-       else if (year >= 48 && year < 70)
-               /* Digital UNIX epoch */
-               epoch = 1952;
+       /* Don't run the hook for UNUSED or SHUTDOWN.  */
+       if (likely(ce->mode == CLOCK_EVT_MODE_PERIODIC))
+               ce->event_handler(ce);
 
-       printk(KERN_INFO "Using epoch = %d\n", epoch);
+       if (test_irq_work_pending()) {
+               clear_irq_work_pending();
+               irq_work_run();
+       }
 
-       if ((year += epoch) < 1970)
-               year += 100;
+       return IRQ_HANDLED;
+}
 
-       ts->tv_sec = mktime(year, mon, day, hour, min, sec);
-       ts->tv_nsec = 0;
+static void
+rtc_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce)
+{
+       /* The mode member of CE is updated in generic code.
+          Since we only support periodic events, nothing to do.  */
+}
+
+static int
+rtc_ce_set_next_event(unsigned long evt, struct clock_event_device *ce)
+{
+       /* This hook is for oneshot mode, which we don't support.  */
+       return -EINVAL;
 }
 
+static void __init
+init_rtc_clockevent(void)
+{
+       int cpu = smp_processor_id();
+       struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+       *ce = (struct clock_event_device){
+               .name = "rtc",
+               .features = CLOCK_EVT_FEAT_PERIODIC,
+               .rating = 100,
+               .cpumask = cpumask_of(cpu),
+               .set_mode = rtc_ce_set_mode,
+               .set_next_event = rtc_ce_set_next_event,
+       };
 
+       clockevents_config_and_register(ce, CONFIG_HZ, 0, 0);
+}
 
+\f
 /*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "xtime_update()" routine every clocktick
+ * The QEMU clock as a clocksource primitive.
  */
-irqreturn_t timer_interrupt(int irq, void *dev)
+
+static cycle_t
+qemu_cs_read(struct clocksource *cs)
 {
-       unsigned long delta;
-       __u32 now;
-       long nticks;
+       return qemu_get_vmtime();
+}
 
-#ifndef CONFIG_SMP
-       /* Not SMP, do kernel PC profiling here.  */
-       profile_tick(CPU_PROFILING);
-#endif
+static struct clocksource qemu_cs = {
+       .name                   = "qemu",
+       .rating                 = 400,
+       .read                   = qemu_cs_read,
+       .mask                   = CLOCKSOURCE_MASK(64),
+       .flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
+       .max_idle_ns            = LONG_MAX
+};
 
-       /*
-        * Calculate how many ticks have passed since the last update,
-        * including any previous partial leftover.  Save any resulting
-        * fraction for the next pass.
-        */
-       now = rpcc();
-       delta = now - state.last_time;
-       state.last_time = now;
-       delta = delta * state.scaled_ticks_per_cycle + state.partial_tick;
-       state.partial_tick = delta & ((1UL << FIX_SHIFT) - 1); 
-       nticks = delta >> FIX_SHIFT;
 
-       if (nticks)
-               xtime_update(nticks);
+/*
+ * The QEMU alarm as a clock_event_device primitive.
+ */
 
-       if (test_irq_work_pending()) {
-               clear_irq_work_pending();
-               irq_work_run();
-       }
+static void
+qemu_ce_set_mode(enum clock_event_mode mode, struct clock_event_device *ce)
+{
+       /* The mode member of CE is updated for us in generic code.
+          Just make sure that the event is disabled.  */
+       qemu_set_alarm_abs(0);
+}
 
-#ifndef CONFIG_SMP
-       while (nticks--)
-               update_process_times(user_mode(get_irq_regs()));
-#endif
+static int
+qemu_ce_set_next_event(unsigned long evt, struct clock_event_device *ce)
+{
+       qemu_set_alarm_rel(evt);
+       return 0;
+}
 
+static irqreturn_t
+qemu_timer_interrupt(int irq, void *dev)
+{
+       int cpu = smp_processor_id();
+       struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+       ce->event_handler(ce);
        return IRQ_HANDLED;
 }
 
+static void __init
+init_qemu_clockevent(void)
+{
+       int cpu = smp_processor_id();
+       struct clock_event_device *ce = &per_cpu(cpu_ce, cpu);
+
+       *ce = (struct clock_event_device){
+               .name = "qemu",
+               .features = CLOCK_EVT_FEAT_ONESHOT,
+               .rating = 400,
+               .cpumask = cpumask_of(cpu),
+               .set_mode = qemu_ce_set_mode,
+               .set_next_event = qemu_ce_set_next_event,
+       };
+
+       clockevents_config_and_register(ce, NSEC_PER_SEC, 1000, LONG_MAX);
+}
+
+\f
 void __init
 common_init_rtc(void)
 {
-       unsigned char x;
+       unsigned char x, sel = 0;
 
        /* Reset periodic interrupt frequency.  */
-       x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f;
-        /* Test includes known working values on various platforms
-           where 0x26 is wrong; we refuse to change those. */
-       if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) {
-               printk("Setting RTC_FREQ to 1024 Hz (%x)\n", x);
-               CMOS_WRITE(0x26, RTC_FREQ_SELECT);
+#if CONFIG_HZ == 1024 || CONFIG_HZ == 1200
+       x = CMOS_READ(RTC_FREQ_SELECT) & 0x3f;
+       /* Test includes known working values on various platforms
+          where 0x26 is wrong; we refuse to change those. */
+       if (x != 0x26 && x != 0x25 && x != 0x19 && x != 0x06) {
+               sel = RTC_REF_CLCK_32KHZ + 6;
        }
+#elif CONFIG_HZ == 256 || CONFIG_HZ == 128 || CONFIG_HZ == 64 || CONFIG_HZ == 32
+       sel = RTC_REF_CLCK_32KHZ + __builtin_ffs(32768 / CONFIG_HZ);
+#else
+# error "Unknown HZ from arch/alpha/Kconfig"
+#endif
+       if (sel) {
+               printk(KERN_INFO "Setting RTC_FREQ to %d Hz (%x)\n",
+                      CONFIG_HZ, sel);
+               CMOS_WRITE(sel, RTC_FREQ_SELECT);
+       }
 
        /* Turn on periodic interrupts.  */
        x = CMOS_READ(RTC_CONTROL);
@@ -233,16 +250,37 @@ common_init_rtc(void)
        init_rtc_irq();
 }
 
-unsigned int common_get_rtc_time(struct rtc_time *time)
-{
-       return __get_rtc_time(time);
-}
+\f
+#ifndef CONFIG_ALPHA_WTINT
+/*
+ * The RPCC as a clocksource primitive.
+ *
+ * While we have free-running timecounters running on all CPUs, and we make
+ * a half-hearted attempt in init_rtc_rpcc_info to sync the timecounter
+ * with the wall clock, that initialization isn't kept up-to-date across
+ * different time counters in SMP mode.  Therefore we can only use this
+ * method when there's only one CPU enabled.
+ *
+ * When using the WTINT PALcall, the RPCC may shift to a lower frequency,
+ * or stop altogether, while waiting for the interrupt.  Therefore we cannot
+ * use this method when WTINT is in use.
+ */
 
-int common_set_rtc_time(struct rtc_time *time)
+static cycle_t read_rpcc(struct clocksource *cs)
 {
-       return __set_rtc_time(time);
+       return rpcc();
 }
 
+static struct clocksource clocksource_rpcc = {
+       .name                   = "rpcc",
+       .rating                 = 300,
+       .read                   = read_rpcc,
+       .mask                   = CLOCKSOURCE_MASK(32),
+       .flags                  = CLOCK_SOURCE_IS_CONTINUOUS
+};
+#endif /* ALPHA_WTINT */
+
+\f
 /* Validate a computed cycle counter result against the known bounds for
    the given processor core.  There's too much brokenness in the way of
    timing hardware for any one method to work everywhere.  :-(
@@ -353,33 +391,6 @@ rpcc_after_update_in_progress(void)
        return rpcc();
 }
 
-#ifndef CONFIG_SMP
-/* Until and unless we figure out how to get cpu cycle counters
-   in sync and keep them there, we can't use the rpcc.  */
-static cycle_t read_rpcc(struct clocksource *cs)
-{
-       cycle_t ret = (cycle_t)rpcc();
-       return ret;
-}
-
-static struct clocksource clocksource_rpcc = {
-       .name                   = "rpcc",
-       .rating                 = 300,
-       .read                   = read_rpcc,
-       .mask                   = CLOCKSOURCE_MASK(32),
-       .flags                  = CLOCK_SOURCE_IS_CONTINUOUS
-};
-
-static inline void register_rpcc_clocksource(long cycle_freq)
-{
-       clocksource_register_hz(&clocksource_rpcc, cycle_freq);
-}
-#else /* !CONFIG_SMP */
-static inline void register_rpcc_clocksource(long cycle_freq)
-{
-}
-#endif /* !CONFIG_SMP */
-
 void __init
 time_init(void)
 {
@@ -387,6 +398,15 @@ time_init(void)
        unsigned long cycle_freq, tolerance;
        long diff;
 
+       if (alpha_using_qemu) {
+               clocksource_register_hz(&qemu_cs, NSEC_PER_SEC);
+               init_qemu_clockevent();
+
+               timer_irqaction.handler = qemu_timer_interrupt;
+               init_rtc_irq();
+               return;
+       }
+
        /* Calibrate CPU clock -- attempt #1.  */
        if (!est_cycle_freq)
                est_cycle_freq = validate_cc_value(calibrate_cc_with_pit());
@@ -421,100 +441,25 @@ time_init(void)
                       "and unable to estimate a proper value!\n");
        }
 
-       /* From John Bowman <bowman@math.ualberta.ca>: allow the values
-          to settle, as the Update-In-Progress bit going low isn't good
-          enough on some hardware.  2ms is our guess; we haven't found 
-          bogomips yet, but this is close on a 500Mhz box.  */
-       __delay(1000000);
-
-
-       if (HZ > (1<<16)) {
-               extern void __you_loose (void);
-               __you_loose();
-       }
-
-       register_rpcc_clocksource(cycle_freq);
-
-       state.last_time = cc1;
-       state.scaled_ticks_per_cycle
-               = ((unsigned long) HZ << FIX_SHIFT) / cycle_freq;
-       state.partial_tick = 0L;
+       /* See above for restrictions on using clocksource_rpcc.  */
+#ifndef CONFIG_ALPHA_WTINT
+       if (hwrpb->nr_processors == 1)
+               clocksource_register_hz(&clocksource_rpcc, cycle_freq);
+#endif
 
        /* Startup the timer source. */
        alpha_mv.init_rtc();
+       init_rtc_clockevent();
 }
 
-/*
- * In order to set the CMOS clock precisely, set_rtc_mmss has to be
- * called 500 ms after the second nowtime has started, because when
- * nowtime is written into the registers of the CMOS clock, it will
- * jump to the next second precisely 500 ms later. Check the Motorola
- * MC146818A or Dallas DS12887 data sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- *      sets the minutes. Usually you won't notice until after reboot!
- */
-
-
-static int
-set_rtc_mmss(unsigned long nowtime)
+/* Initialize the clock_event_device for secondary cpus.  */
+#ifdef CONFIG_SMP
+void __init
+init_clockevent(void)
 {
-       int retval = 0;
-       int real_seconds, real_minutes, cmos_minutes;
-       unsigned char save_control, save_freq_select;
-
-       /* irq are locally disabled here */
-       spin_lock(&rtc_lock);
-       /* Tell the clock it's being set */
-       save_control = CMOS_READ(RTC_CONTROL);
-       CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
-       /* Stop and reset prescaler */
-       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-       CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
-       cmos_minutes = CMOS_READ(RTC_MINUTES);
-       if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               cmos_minutes = bcd2bin(cmos_minutes);
-
-       /*
-        * since we're only adjusting minutes and seconds,
-        * don't interfere with hour overflow. This avoids
-        * messing with unknown time zones but requires your
-        * RTC not to be off by more than 15 minutes
-        */
-       real_seconds = nowtime % 60;
-       real_minutes = nowtime / 60;
-       if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) {
-               /* correct for half hour time zone */
-               real_minutes += 30;
-       }
-       real_minutes %= 60;
-
-       if (abs(real_minutes - cmos_minutes) < 30) {
-               if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-                       real_seconds = bin2bcd(real_seconds);
-                       real_minutes = bin2bcd(real_minutes);
-               }
-               CMOS_WRITE(real_seconds,RTC_SECONDS);
-               CMOS_WRITE(real_minutes,RTC_MINUTES);
-       } else {
-               printk_once(KERN_NOTICE
-                      "set_rtc_mmss: can't update from %d to %d\n",
-                      cmos_minutes, real_minutes);
-               retval = -1;
-       }
-
-       /* The following flags have to be released exactly in this order,
-        * otherwise the DS12887 (popular MC146818A clone with integrated
-        * battery and quartz) will not reset the oscillator and will not
-        * update precisely 500 ms later. You won't find this mentioned in
-        * the Dallas Semiconductor data sheets, but who believes data
-        * sheets anyway ...                           -- Markus Kuhn
-        */
-       CMOS_WRITE(save_control, RTC_CONTROL);
-       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-       spin_unlock(&rtc_lock);
-
-       return retval;
+       if (alpha_using_qemu)
+               init_qemu_clockevent();
+       else
+               init_rtc_clockevent();
 }
+#endif
index bd0665cdc840d3e9a79b752ec3a88279ede69344..9c4c189eb22f5a9db2d2ae678756a5241b3e1ee5 100644 (file)
@@ -241,6 +241,21 @@ do_entIF(unsigned long type, struct pt_regs *regs)
                               (const char *)(data[1] | (long)data[2] << 32), 
                               data[0]);
                }
+#ifdef CONFIG_ALPHA_WTINT
+               if (type == 4) {
+                       /* If CALL_PAL WTINT is totally unsupported by the
+                          PALcode, e.g. MILO, "emulate" it by overwriting
+                          the insn.  */
+                       unsigned int *pinsn
+                         = (unsigned int *) regs->pc - 1;
+                       if (*pinsn == PAL_wtint) {
+                               *pinsn = 0x47e01400; /* mov 0,$0 */
+                               imb();
+                               regs->r0 = 0;
+                               return;
+                       }
+               }
+#endif /* ALPHA_WTINT */
                die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"),
                              regs, type, NULL);
        }
index ffb19b7da999c67722d5690dcfcd6ef0d74123f8..ff3c10721caf67be1873f5c5b88623fc65fd89ea 100644 (file)
@@ -130,7 +130,7 @@ csum_partial_cfu_aligned(const unsigned long __user *src, unsigned long *dst,
                *dst = word | tmp;
                checksum += carry;
        }
-       if (err) *errp = err;
+       if (err && errp) *errp = err;
        return checksum;
 }
 
@@ -185,7 +185,7 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src,
                *dst = word | tmp;
                checksum += carry;
        }
-       if (err) *errp = err;
+       if (err && errp) *errp = err;
        return checksum;
 }
 
@@ -242,7 +242,7 @@ csum_partial_cfu_src_aligned(const unsigned long __user *src,
        stq_u(partial_dest | second_dest, dst);
 out:
        checksum += carry;
-       if (err) *errp = err;
+       if (err && errp) *errp = err;
        return checksum;
 }
 
@@ -325,7 +325,7 @@ csum_partial_cfu_unaligned(const unsigned long __user * src,
                stq_u(partial_dest | word | second_dest, dst);
                checksum += carry;
        }
-       if (err) *errp = err;
+       if (err && errp) *errp = err;
        return checksum;
 }
 
@@ -339,7 +339,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
 
        if (len) {
                if (!access_ok(VERIFY_READ, src, len)) {
-                       *errp = -EFAULT;
+                       if (errp) *errp = -EFAULT;
                        memset(dst, 0, len);
                        return sum;
                }
index d8b94e1c7fcad001c32142f4e4cc51d3cc8f6a5b..356bb2fdd70567721023b8e0a3fc1d59f2f5d981 100644 (file)
        .set noat
        .set noreorder
 .text
+       .globl memset
        .globl __memset
+       .globl ___memset
        .globl __memsetw
        .globl __constant_c_memset
-       .globl memset
 
-       .ent __memset
+       .ent ___memset
 .align 5
-__memset:
+___memset:
        .frame $30,0,$26,0
        .prologue 0
 
@@ -227,7 +228,7 @@ end_b:
        nop
        nop
        ret $31,($26),1         # L0 :
-       .end __memset
+       .end ___memset
 
        /*
         * This is the original body of code, prior to replication and
@@ -594,4 +595,5 @@ end_w:
 
        .end __memsetw
 
-memset = __memset
+memset = ___memset
+__memset = ___memset
index 311b8cfc691488743d178c2dd79eb9f7edafbd81..76ccc6d1f364d67ca8c03c859171f113da5e23ce 100644 (file)
 .text
        .globl memset
        .globl __memset
+       .globl ___memset
        .globl __memsetw
        .globl __constant_c_memset
-       .ent __memset
+
+       .ent ___memset
 .align 5
-__memset:
+___memset:
        .frame $30,0,$26,0
        .prologue 0
 
@@ -103,7 +105,7 @@ within_one_quad:
 
 end:
        ret $31,($26),1         /* E1 */
-       .end __memset
+       .end ___memset
 
        .align 5
        .ent __memsetw
@@ -121,4 +123,5 @@ __memsetw:
 
        .end __memsetw
 
-memset = __memset
+memset = ___memset
+__memset = ___memset
index 214b698cefea895e35b345dc9e8d070d33cf6929..c1f1a7eee953de4378b1f74bd4907c969f96dceb 100644 (file)
@@ -25,7 +25,7 @@ config ARM
        select HARDIRQS_SW_RESEND
        select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
        select HAVE_ARCH_KGDB
-       select HAVE_ARCH_SECCOMP_FILTER
+       select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
        select HAVE_ARCH_TRACEHOOK
        select HAVE_BPF_JIT
        select HAVE_CONTEXT_TRACKING
@@ -1496,6 +1496,7 @@ config HAVE_ARM_ARCH_TIMER
        bool "Architected timer support"
        depends on CPU_V7
        select ARM_ARCH_TIMER
+       select GENERIC_CLOCKEVENTS
        help
          This option enables support for the ARM architected timer
 
@@ -1719,7 +1720,6 @@ config AEABI
 config OABI_COMPAT
        bool "Allow old ABI binaries to run with this kernel (EXPERIMENTAL)"
        depends on AEABI && !THUMB2_KERNEL
-       default y
        help
          This option preserves the old syscall interface along with the
          new (ARM EABI) one. It also provides a compatibility layer to
@@ -1727,11 +1727,16 @@ config OABI_COMPAT
          in memory differs between the legacy ABI and the new ARM EABI
          (only for non "thumb" binaries). This option adds a tiny
          overhead to all syscalls and produces a slightly larger kernel.
+
+         The seccomp filter system will not be available when this is
+         selected, since there is no way yet to sensibly distinguish
+         between calling conventions during filtering.
+
          If you know you'll be using only pure EABI user space then you
          can say N here. If this option is not selected and you attempt
          to execute a legacy ABI binary then the result will be
          UNPREDICTABLE (in fact it can be predicted that it won't work
-         at all). If in doubt say Y.
+         at all). If in doubt say N.
 
 config ARCH_HAS_HOLES_MEMORYMODEL
        bool
index 8e1a0245907f85be1a460bfa785f744daf285d6f..41bca32409fce81358c3b5c35bc081bcc28e7c76 100644 (file)
@@ -404,7 +404,7 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
                                        BIT(slot));
                        if (edma_cc[ctlr]->intr_data[channel].callback)
                                edma_cc[ctlr]->intr_data[channel].callback(
-                                       channel, DMA_COMPLETE,
+                                       channel, EDMA_DMA_COMPLETE,
                                        edma_cc[ctlr]->intr_data[channel].data);
                }
        } while (sh_ipr);
@@ -459,7 +459,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
                                                                callback) {
                                                edma_cc[ctlr]->intr_data[k].
                                                callback(k,
-                                               DMA_CC_ERROR,
+                                               EDMA_DMA_CC_ERROR,
                                                edma_cc[ctlr]->intr_data
                                                [k].data);
                                        }
index 9b28f1243bdc1d96c2be3dac0ad947a72a61a264..240b29ef17db9772af6abc4855b90c7c16621e81 100644 (file)
@@ -393,36 +393,6 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
        return slot_cnt;
 }
 
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
-       return 0;
-}
-
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
-                                       struct iop_adma_chan *chan)
-{
-       union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-       switch (chan->device->id) {
-       case DMA0_ID:
-       case DMA1_ID:
-               return hw_desc.dma->dest_addr;
-       case AAU_ID:
-               return hw_desc.aau->dest_addr;
-       default:
-               BUG();
-       }
-       return 0;
-}
-
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
-                                         struct iop_adma_chan *chan)
-{
-       BUG();
-       return 0;
-}
-
 static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
                                        struct iop_adma_chan *chan)
 {
index 122f86d8c991d73e587c786eba41bf85a6ec5a21..250760e081039542dbcdae191b1b5321921a1d3d 100644 (file)
@@ -82,8 +82,6 @@ struct iop_adma_chan {
  * @slot_cnt: total slots used in an transaction (group of operations)
  * @slots_per_op: number of slots per operation
  * @idx: pool index
- * @unmap_src_cnt: number of xor sources
- * @unmap_len: transaction bytecount
  * @tx_list: list of descriptors that are associated with one operation
  * @async_tx: support for the async_tx api
  * @group_list: list of slots that make up a multi-descriptor transaction
@@ -99,8 +97,6 @@ struct iop_adma_desc_slot {
        u16 slot_cnt;
        u16 slots_per_op;
        u16 idx;
-       u16 unmap_src_cnt;
-       size_t unmap_len;
        struct list_head tx_list;
        struct dma_async_tx_descriptor async_tx;
        union {
index 4dd21457ef9d2be8b1c94cac7eea97c8ef8cc1f6..9ecccc865046a2c257277cd03a8f607ed5e0217d 100644 (file)
@@ -226,7 +226,14 @@ static inline phys_addr_t __virt_to_phys(unsigned long x)
 static inline unsigned long __phys_to_virt(phys_addr_t x)
 {
        unsigned long t;
-       __pv_stub(x, t, "sub", __PV_BITS_31_24);
+
+       /*
+        * 'unsigned long' cast discard upper word when
+        * phys_addr_t is 64 bit, and makes sure that inline
+        * assembler expression receives 32 bit argument
+        * in place where 'r' 32 bit operand is expected.
+        */
+       __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24);
        return t;
 }
 
index 7801866e626a2a1a4631d9e3e3fbd3c27ddda429..11d59b32fb8dca45613ed00fb225a72359c19216 100644 (file)
@@ -508,6 +508,7 @@ __fixup_smp:
        teq     r0, #0x0                @ '0' on actual UP A9 hardware
        beq     __fixup_smp_on_up       @ So its an A9 UP
        ldr     r0, [r0, #4]            @ read SCU Config
+ARM_BE8(rev    r0, r0)                 @ byteswap if big endian
        and     r0, r0, #0x3            @ number of CPUs
        teq     r0, #0x0                @ is 1?
        movne   pc, lr
@@ -643,8 +644,12 @@ ARM_BE8(rev16      ip, ip)
        ldrcc   r7, [r4], #4    @ use branch for delay slot
        bcc     1b
        bx      lr
+#else
+#ifdef CONFIG_CPU_ENDIAN_BE8
+       moveq   r0, #0x00004000 @ set bit 22, mov to mvn instruction
 #else
        moveq   r0, #0x400000   @ set bit 22, mov to mvn instruction
+#endif
        b       2f
 1:     ldr     ip, [r7, r3]
 #ifdef CONFIG_CPU_ENDIAN_BE8
@@ -653,7 +658,7 @@ ARM_BE8(rev16       ip, ip)
        tst     ip, #0x000f0000 @ check the rotation field
        orrne   ip, ip, r6, lsl #24 @ mask in offset bits 31-24
        biceq   ip, ip, #0x00004000 @ clear bit 22
-       orreq   ip, ip, r0, lsl #24 @ mask in offset bits 7-0
+       orreq   ip, ip, r0      @ mask in offset bits 7-0
 #else
        bic     ip, ip, #0x000000ff
        tst     ip, #0xf00      @ check the rotation field
index 6125f259b7b5359072b0cd7a07e122fcd2bda4bd..dbf0923e8d76bda9392b902e0c8e500025d70402 100644 (file)
@@ -856,7 +856,7 @@ static void __init kuser_init(void *vectors)
                memcpy(vectors + 0xfe0, vectors + 0xfe8, 4);
 }
 #else
-static void __init kuser_init(void *vectors)
+static inline void __init kuser_init(void *vectors)
 {
 }
 #endif
index 371958370de445fbe0cc200e772fa1e9426fb327..580906989db1091eb034ada5cc60570505de8cd1 100644 (file)
@@ -334,6 +334,17 @@ out:
        return err;
 }
 
+static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
+{
+       if (!is_vmalloc_addr(kaddr)) {
+               BUG_ON(!virt_addr_valid(kaddr));
+               return __pa(kaddr);
+       } else {
+               return page_to_phys(vmalloc_to_page(kaddr)) +
+                      offset_in_page(kaddr);
+       }
+}
+
 /**
  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:      The virtual kernel start address of the range
@@ -345,16 +356,27 @@ out:
  */
 int create_hyp_mappings(void *from, void *to)
 {
-       unsigned long phys_addr = virt_to_phys(from);
+       phys_addr_t phys_addr;
+       unsigned long virt_addr;
        unsigned long start = KERN_TO_HYP((unsigned long)from);
        unsigned long end = KERN_TO_HYP((unsigned long)to);
 
-       /* Check for a valid kernel memory mapping */
-       if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
-               return -EINVAL;
+       start = start & PAGE_MASK;
+       end = PAGE_ALIGN(end);
 
-       return __create_hyp_mappings(hyp_pgd, start, end,
-                                    __phys_to_pfn(phys_addr), PAGE_HYP);
+       for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+               int err;
+
+               phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
+               err = __create_hyp_mappings(hyp_pgd, virt_addr,
+                                           virt_addr + PAGE_SIZE,
+                                           __phys_to_pfn(phys_addr),
+                                           PAGE_HYP);
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
 /**
index e0c68d5bb7dc25dd3fa93dc0fa1b3899f5b09019..52886b89706caf466b1cc6c6586db70a7d9d962e 100644 (file)
@@ -10,7 +10,7 @@ UNWIND(       .fnstart        )
        and     r3, r0, #31             @ Get bit offset
        mov     r0, r0, lsr #5
        add     r1, r1, r0, lsl #2      @ Get word offset
-#if __LINUX_ARM_ARCH__ >= 7
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
        .arch_extension mp
        ALT_SMP(W(pldw) [r1])
        ALT_UP(W(nop))
index 6d3782d85a9ff6d2db65a71de2632cc0b1151b33..a86fd0ed775788012197270c96ea8190282aed47 100644 (file)
@@ -218,20 +218,6 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
 #define iop_chan_pq_slot_count iop_chan_xor_slot_count
 #define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
 
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
-                                       struct iop_adma_chan *chan)
-{
-       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-       return hw_desc->dest_addr;
-}
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
-                                         struct iop_adma_chan *chan)
-{
-       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-       return hw_desc->q_dest_addr;
-}
-
 static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
                                        struct iop_adma_chan *chan)
 {
@@ -350,18 +336,6 @@ iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
        hw_desc->desc_ctrl = u_desc_ctrl.value;
 }
 
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
-       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-       union {
-               u32 value;
-               struct iop13xx_adma_desc_ctrl field;
-       } u_desc_ctrl;
-
-       u_desc_ctrl.value = hw_desc->desc_ctrl;
-       return u_desc_ctrl.field.pq_xfer_en;
-}
-
 static inline void
 iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
                          unsigned long flags)
index 78eeeca78f5ab331707fcd73b4956c503c2d880b..580ef2de82d728f8ecfde5f5f3b208a2e5525b06 100644 (file)
@@ -558,8 +558,8 @@ static void __init build_mem_type_table(void)
                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
                break;
        }
-       printk("Memory policy: ECC %sabled, Data cache %s\n",
-               ecc_mask ? "en" : "dis", cp->policy);
+       pr_info("Memory policy: %sData cache %s\n",
+               ecc_mask ? "ECC enabled, " : "", cp->policy);
 
        for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
                struct mem_type *t = &mem_types[i];
index 5c668b7a31f97e6df35dcec53b79d5a70a9d1d0b..55764a7ef1f021934ba2f0b0136fc1b1df2a2799 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/mach/arch.h>
 #include <asm/cputype.h>
 #include <asm/mpu.h>
+#include <asm/procinfo.h>
 
 #include "mm.h"
 
index 60920f62fdf5994f04477bc94aba09dcd349385a..bd1781979a391825043d078192666e5a846cdae5 100644 (file)
@@ -92,7 +92,7 @@ ENDPROC(cpu_v7_dcache_clean_area)
 
 /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
 .globl cpu_v7_suspend_size
-.equ   cpu_v7_suspend_size, 4 * 8
+.equ   cpu_v7_suspend_size, 4 * 9
 #ifdef CONFIG_ARM_CPU_SUSPEND
 ENTRY(cpu_v7_do_suspend)
        stmfd   sp!, {r4 - r10, lr}
@@ -101,13 +101,17 @@ ENTRY(cpu_v7_do_suspend)
        stmia   r0!, {r4 - r5}
 #ifdef CONFIG_MMU
        mrc     p15, 0, r6, c3, c0, 0   @ Domain ID
+#ifdef CONFIG_ARM_LPAE
+       mrrc    p15, 1, r5, r7, c2      @ TTB 1
+#else
        mrc     p15, 0, r7, c2, c0, 1   @ TTB 1
+#endif
        mrc     p15, 0, r11, c2, c0, 2  @ TTB control register
 #endif
        mrc     p15, 0, r8, c1, c0, 0   @ Control register
        mrc     p15, 0, r9, c1, c0, 1   @ Auxiliary control register
        mrc     p15, 0, r10, c1, c0, 2  @ Co-processor access control
-       stmia   r0, {r6 - r11}
+       stmia   r0, {r5 - r11}
        ldmfd   sp!, {r4 - r10, pc}
 ENDPROC(cpu_v7_do_suspend)
 
@@ -118,16 +122,19 @@ ENTRY(cpu_v7_do_resume)
        ldmia   r0!, {r4 - r5}
        mcr     p15, 0, r4, c13, c0, 0  @ FCSE/PID
        mcr     p15, 0, r5, c13, c0, 3  @ User r/o thread ID
-       ldmia   r0, {r6 - r11}
+       ldmia   r0, {r5 - r11}
 #ifdef CONFIG_MMU
        mcr     p15, 0, ip, c8, c7, 0   @ invalidate TLBs
        mcr     p15, 0, r6, c3, c0, 0   @ Domain ID
-#ifndef CONFIG_ARM_LPAE
+#ifdef CONFIG_ARM_LPAE
+       mcrr    p15, 0, r1, ip, c2      @ TTB 0
+       mcrr    p15, 1, r5, r7, c2      @ TTB 1
+#else
        ALT_SMP(orr     r1, r1, #TTB_FLAGS_SMP)
        ALT_UP(orr      r1, r1, #TTB_FLAGS_UP)
-#endif
        mcr     p15, 0, r1, c2, c0, 0   @ TTB 0
        mcr     p15, 0, r7, c2, c0, 1   @ TTB 1
+#endif
        mcr     p15, 0, r11, c2, c0, 2  @ TTB control register
        ldr     r4, =PRRR               @ PRRR
        ldr     r5, =NMRR               @ NMRR
index 4488fa27fe948c2e73018e962628c7883327bf07..2ffc298f061b3e7704e89dd634ece905ff13dcf4 100644 (file)
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 #include <asm/setup.h>
+#include <asm/thread_info.h>
+#include <asm/sysreg.h>
 
        /*
         * The kernel is loaded where we want it to be and all caches
        .section .init.text,"ax"
        .global _start
 _start:
-       /* Check if the boot loader actually provided a tag table */
-       lddpc   r0, magic_number
-       cp.w    r12, r0
-       brne    no_tag_table
-
        /* Initialize .bss */
        lddpc   r2, bss_start_addr
        lddpc   r3, end_addr
@@ -34,6 +31,25 @@ _start:
        cp      r2, r3
        brlo    1b
 
+       /* Initialize status register */
+       lddpc   r0, init_sr
+       mtsr    SYSREG_SR, r0
+
+       /* Set initial stack pointer */
+       lddpc   sp, stack_addr
+       sub     sp, -THREAD_SIZE
+
+#ifdef CONFIG_FRAME_POINTER
+       /* Mark last stack frame */
+       mov     lr, 0
+       mov     r7, 0
+#endif
+
+       /* Check if the boot loader actually provided a tag table */
+       lddpc   r0, magic_number
+       cp.w    r12, r0
+       brne    no_tag_table
+
        /*
         * Save the tag table address for later use. This must be done
         * _after_ .bss has been initialized...
@@ -53,8 +69,15 @@ bss_start_addr:
        .long   __bss_start
 end_addr:
        .long   _end
+init_sr:
+       .long   0x007f0000      /* Supervisor mode, everything masked */
+stack_addr:
+       .long   init_thread_union
+panic_addr:
+       .long   panic
 
 no_tag_table:
        sub     r12, pc, (. - 2f)
-       bral    panic
+       /* branch to panic() which can be far away with that construct */
+       lddpc   pc, panic_addr
 2:     .asciz  "Boot loader didn't provide correct magic number\n"
index 996cb656474e267920ad6856f6445156349b0579..45f563ed73fd51e6d4b0590a56d0f7fa3e5f4c48 100644 (file)
@@ -16,6 +16,7 @@
 typedef u16    kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION 0xd673  /* breakpoint */
 #define MAX_INSN_SIZE          2
+#define MAX_STACK_SIZE         64      /* 32 would probably be OK */
 
 #define kretprobe_blacklist_size 0
 
@@ -26,6 +27,19 @@ struct arch_specific_insn {
        kprobe_opcode_t insn[MAX_INSN_SIZE];
 };
 
+struct prev_kprobe {
+       struct kprobe *kp;
+       unsigned int status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+       unsigned int kprobe_status;
+       struct prev_kprobe prev_kprobe;
+       struct pt_regs jprobe_saved_regs;
+       char jprobes_stack[MAX_STACK_SIZE];
+};
+
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
                                    unsigned long val, void *data);
index 3b85eaddf525f2be65d5772be3f28051afac7b43..08d8a3d76ea8628b6d749370c4edec3efb5b578c 100644 (file)
@@ -2,35 +2,35 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 header-y += auxvec.h
-header-y += bitsperlong.h
 header-y += byteorder.h
 header-y += cachectl.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm_para.h
-header-y += mman.h
 header-y += msgbuf.h
 header-y += param.h
-header-y += poll.h
 header-y += posix_types.h
 header-y += ptrace.h
-header-y += resource.h
 header-y += sembuf.h
 header-y += setup.h
 header-y += shmbuf.h
 header-y += sigcontext.h
-header-y += siginfo.h
 header-y += signal.h
 header-y += socket.h
 header-y += sockios.h
 header-y += stat.h
-header-y += statfs.h
 header-y += swab.h
 header-y += termbits.h
 header-y += termios.h
 header-y += types.h
 header-y += unistd.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
 generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += siginfo.h
+generic-y += statfs.h
index d5dd435bf8f4769cc5d20184eca9db07c81f64d1..4f02da3ffefa63d61bcaa8de9a6429a819c800c2 100644 (file)
@@ -1,4 +1,4 @@
-#ifndef __ASM_AVR32_AUXVEC_H
-#define __ASM_AVR32_AUXVEC_H
+#ifndef _UAPI__ASM_AVR32_AUXVEC_H
+#define _UAPI__ASM_AVR32_AUXVEC_H
 
-#endif /* __ASM_AVR32_AUXVEC_H */
+#endif /* _UAPI__ASM_AVR32_AUXVEC_H */
diff --git a/arch/avr32/include/uapi/asm/bitsperlong.h b/arch/avr32/include/uapi/asm/bitsperlong.h
deleted file mode 100644 (file)
index 6dc0bb0..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitsperlong.h>
index 50abc21619a8a288b9c46a2ac0d38ca56ce344e7..71242f0d39c6d7ef2caed95524356e159cd5796c 100644 (file)
@@ -1,9 +1,9 @@
 /*
  * AVR32 endian-conversion functions.
  */
-#ifndef __ASM_AVR32_BYTEORDER_H
-#define __ASM_AVR32_BYTEORDER_H
+#ifndef _UAPI__ASM_AVR32_BYTEORDER_H
+#define _UAPI__ASM_AVR32_BYTEORDER_H
 
 #include <linux/byteorder/big_endian.h>
 
-#endif /* __ASM_AVR32_BYTEORDER_H */
+#endif /* _UAPI__ASM_AVR32_BYTEORDER_H */
index 4faf1ce600616d82108523b157d065baa6cdeee1..573a9584dd57eafeefdb18754d0e0df7a9b65f77 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_CACHECTL_H
-#define __ASM_AVR32_CACHECTL_H
+#ifndef _UAPI__ASM_AVR32_CACHECTL_H
+#define _UAPI__ASM_AVR32_CACHECTL_H
 
 /*
  * Operations that can be performed through the cacheflush system call
@@ -8,4 +8,4 @@
 /* Clean the data cache, then invalidate the icache */
 #define CACHE_IFLUSH   0
 
-#endif /* __ASM_AVR32_CACHECTL_H */
+#endif /* _UAPI__ASM_AVR32_CACHECTL_H */
diff --git a/arch/avr32/include/uapi/asm/errno.h b/arch/avr32/include/uapi/asm/errno.h
deleted file mode 100644 (file)
index 558a724..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_ERRNO_H
-#define __ASM_AVR32_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif /* __ASM_AVR32_ERRNO_H */
diff --git a/arch/avr32/include/uapi/asm/fcntl.h b/arch/avr32/include/uapi/asm/fcntl.h
deleted file mode 100644 (file)
index 14c0c44..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_FCNTL_H
-#define __ASM_AVR32_FCNTL_H
-
-#include <asm-generic/fcntl.h>
-
-#endif /* __ASM_AVR32_FCNTL_H */
diff --git a/arch/avr32/include/uapi/asm/ioctl.h b/arch/avr32/include/uapi/asm/ioctl.h
deleted file mode 100644 (file)
index c8472c1..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_IOCTL_H
-#define __ASM_AVR32_IOCTL_H
-
-#include <asm-generic/ioctl.h>
-
-#endif /* __ASM_AVR32_IOCTL_H */
diff --git a/arch/avr32/include/uapi/asm/ioctls.h b/arch/avr32/include/uapi/asm/ioctls.h
deleted file mode 100644 (file)
index 909cf66..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_IOCTLS_H
-#define __ASM_AVR32_IOCTLS_H
-
-#include <asm-generic/ioctls.h>
-
-#endif /* __ASM_AVR32_IOCTLS_H */
diff --git a/arch/avr32/include/uapi/asm/ipcbuf.h b/arch/avr32/include/uapi/asm/ipcbuf.h
deleted file mode 100644 (file)
index 84c7e51..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/avr32/include/uapi/asm/kvm_para.h b/arch/avr32/include/uapi/asm/kvm_para.h
deleted file mode 100644 (file)
index 14fab8f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kvm_para.h>
diff --git a/arch/avr32/include/uapi/asm/mman.h b/arch/avr32/include/uapi/asm/mman.h
deleted file mode 100644 (file)
index 8eebf89..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mman.h>
index ac18bc4da7f7acfe154ca532093ea5712fe714ad..9eae6effad14029c1f128277eb8c0eec89fe2be5 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_MSGBUF_H
-#define __ASM_AVR32_MSGBUF_H
+#ifndef _UAPI__ASM_AVR32_MSGBUF_H
+#define _UAPI__ASM_AVR32_MSGBUF_H
 
 /*
  * The msqid64_ds structure for i386 architecture.
@@ -28,4 +28,4 @@ struct msqid64_ds {
        unsigned long  __unused5;
 };
 
-#endif /* __ASM_AVR32_MSGBUF_H */
+#endif /* _UAPI__ASM_AVR32_MSGBUF_H */
diff --git a/arch/avr32/include/uapi/asm/poll.h b/arch/avr32/include/uapi/asm/poll.h
deleted file mode 100644 (file)
index c98509d..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
index 9ba9e749b3f34d7c2760d1d9784ff9ede9528ef3..5b813a8abf0946645d00979dc1bcd997f43362bd 100644 (file)
@@ -5,8 +5,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#ifndef __ASM_AVR32_POSIX_TYPES_H
-#define __ASM_AVR32_POSIX_TYPES_H
+#ifndef _UAPI__ASM_AVR32_POSIX_TYPES_H
+#define _UAPI__ASM_AVR32_POSIX_TYPES_H
 
 /*
  * This file is generally used by user-level software, so you need to
@@ -34,4 +34,4 @@ typedef unsigned short  __kernel_old_dev_t;
 
 #include <asm-generic/posix_types.h>
 
-#endif /* __ASM_AVR32_POSIX_TYPES_H */
+#endif /* _UAPI__ASM_AVR32_POSIX_TYPES_H */
diff --git a/arch/avr32/include/uapi/asm/resource.h b/arch/avr32/include/uapi/asm/resource.h
deleted file mode 100644 (file)
index c6dd101..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_RESOURCE_H
-#define __ASM_AVR32_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif /* __ASM_AVR32_RESOURCE_H */
index e472216e0c9717a2bd577775f9c686251c9688b9..6c6f7cf1e75ac99ce9f849d6a2343904cc2996d5 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SEMBUF_H
-#define __ASM_AVR32_SEMBUF_H
+#ifndef _UAPI__ASM_AVR32_SEMBUF_H
+#define _UAPI__ASM_AVR32_SEMBUF_H
 
 /*
 * The semid64_ds structure for AVR32 architecture.
@@ -22,4 +22,4 @@ struct semid64_ds {
         unsigned long   __unused4;
 };
 
-#endif /* __ASM_AVR32_SEMBUF_H */
+#endif /* _UAPI__ASM_AVR32_SEMBUF_H */
index e58aa9356fafc080e311ac9a080ac05af49fb9e6..a654df7dba462c68de298a3d7b3741b0a5855cf5 100644 (file)
@@ -13,5 +13,4 @@
 
 #define COMMAND_LINE_SIZE 256
 
-
 #endif /* _UAPI__ASM_AVR32_SETUP_H__ */
index c62fba41739aff13c647044b8c1017a7c6307614..b94cf8b60b73df0567125f2877aed7dbd7308be9 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SHMBUF_H
-#define __ASM_AVR32_SHMBUF_H
+#ifndef _UAPI__ASM_AVR32_SHMBUF_H
+#define _UAPI__ASM_AVR32_SHMBUF_H
 
 /*
  * The shmid64_ds structure for i386 architecture.
@@ -39,4 +39,4 @@ struct shminfo64 {
        unsigned long   __unused4;
 };
 
-#endif /* __ASM_AVR32_SHMBUF_H */
+#endif /* _UAPI__ASM_AVR32_SHMBUF_H */
index e04062b5f39fe421045ae6b6a4f5bd13920ca38d..27e56bf6377f6c00647b0ae5a0ff4c2cfce20542 100644 (file)
@@ -5,8 +5,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#ifndef __ASM_AVR32_SIGCONTEXT_H
-#define __ASM_AVR32_SIGCONTEXT_H
+#ifndef _UAPI__ASM_AVR32_SIGCONTEXT_H
+#define _UAPI__ASM_AVR32_SIGCONTEXT_H
 
 struct sigcontext {
        unsigned long   oldmask;
@@ -31,4 +31,4 @@ struct sigcontext {
        unsigned long   r0;
 };
 
-#endif /* __ASM_AVR32_SIGCONTEXT_H */
+#endif /* _UAPI__ASM_AVR32_SIGCONTEXT_H */
diff --git a/arch/avr32/include/uapi/asm/siginfo.h b/arch/avr32/include/uapi/asm/siginfo.h
deleted file mode 100644 (file)
index 5ee93f4..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _AVR32_SIGINFO_H
-#define _AVR32_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-#endif
index 1b77a93eff500c14ac9594c9f6098007353c8a78..ffe8c770cafd86efe310fe9882210691fcf44c44 100644 (file)
@@ -118,5 +118,4 @@ typedef struct sigaltstack {
        size_t ss_size;
 } stack_t;
 
-
 #endif /* _UAPI__ASM_AVR32_SIGNAL_H */
index 4399364214349674999c872ca4779c7089c22160..cbf902e4cd9e9ef1528e48a24f3d56c3fc2d889d 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SOCKET_H
-#define __ASM_AVR32_SOCKET_H
+#ifndef _UAPI__ASM_AVR32_SOCKET_H
+#define _UAPI__ASM_AVR32_SOCKET_H
 
 #include <asm/sockios.h>
 
@@ -78,4 +78,4 @@
 
 #define SO_MAX_PACING_RATE     47
 
-#endif /* __ASM_AVR32_SOCKET_H */
+#endif /* _UAPI__ASM_AVR32_SOCKET_H */
index 0802d742f97d79138bbc5b4400f2fe1c145dece5..d04785453532a51fcfa0d3b654e875e18f1bfb30 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_SOCKIOS_H
-#define __ASM_AVR32_SOCKIOS_H
+#ifndef _UAPI__ASM_AVR32_SOCKIOS_H
+#define _UAPI__ASM_AVR32_SOCKIOS_H
 
 /* Socket-level I/O control calls. */
 #define FIOSETOWN      0x8901
@@ -10,4 +10,4 @@
 #define SIOCGSTAMP     0x8906          /* Get stamp (timeval) */
 #define SIOCGSTAMPNS   0x8907          /* Get stamp (timespec) */
 
-#endif /* __ASM_AVR32_SOCKIOS_H */
+#endif /* _UAPI__ASM_AVR32_SOCKIOS_H */
index e72881e10230506e5adba38057738680234892e1..c06acef7fce7f42d6204d5d97a1c7a42100409a6 100644 (file)
@@ -5,8 +5,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#ifndef __ASM_AVR32_STAT_H
-#define __ASM_AVR32_STAT_H
+#ifndef _UAPI__ASM_AVR32_STAT_H
+#define _UAPI__ASM_AVR32_STAT_H
 
 struct __old_kernel_stat {
         unsigned short st_dev;
@@ -76,4 +76,4 @@ struct stat64 {
        unsigned long   __unused2;
 };
 
-#endif /* __ASM_AVR32_STAT_H */
+#endif /* _UAPI__ASM_AVR32_STAT_H */
diff --git a/arch/avr32/include/uapi/asm/statfs.h b/arch/avr32/include/uapi/asm/statfs.h
deleted file mode 100644 (file)
index 2961bd1..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_STATFS_H
-#define __ASM_AVR32_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif /* __ASM_AVR32_STATFS_H */
index 14cc737bbca6e909dbd7ec44d36703731f8ca519..1a03549e7dc5ea9ef3818194cd0a5a319d90eddc 100644 (file)
@@ -1,8 +1,8 @@
 /*
  * AVR32 byteswapping functions.
  */
-#ifndef __ASM_AVR32_SWAB_H
-#define __ASM_AVR32_SWAB_H
+#ifndef _UAPI__ASM_AVR32_SWAB_H
+#define _UAPI__ASM_AVR32_SWAB_H
 
 #include <linux/types.h>
 #include <linux/compiler.h>
@@ -32,4 +32,4 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
 #define __arch_swab32 __arch_swab32
 #endif
 
-#endif /* __ASM_AVR32_SWAB_H */
+#endif /* _UAPI__ASM_AVR32_SWAB_H */
index 366adc5ebb100db9924f584568327db7ab70672c..32789ccb38f8434cfd494dbaed6088eb304156de 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef __ASM_AVR32_TERMBITS_H
-#define __ASM_AVR32_TERMBITS_H
+#ifndef _UAPI__ASM_AVR32_TERMBITS_H
+#define _UAPI__ASM_AVR32_TERMBITS_H
 
 #include <linux/posix_types.h>
 
@@ -193,4 +193,4 @@ struct ktermios {
 #define        TCSADRAIN       1
 #define        TCSAFLUSH       2
 
-#endif /* __ASM_AVR32_TERMBITS_H */
+#endif /* _UAPI__ASM_AVR32_TERMBITS_H */
index b8ef8ea6335284bac7281f518410b2c568c9a632..c8a0081556c4da3b8dfb4041392fb1bb95591f40 100644 (file)
@@ -46,5 +46,4 @@ struct termio {
 
 /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
 
-
 #endif /* _UAPI__ASM_AVR32_TERMIOS_H */
index bb34ad349dfc1a12d35d1e48066baa71f4c4bee2..7c986c4e99b55cf3727ceb68eeb78841c8d99b11 100644 (file)
@@ -5,4 +5,9 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#ifndef _UAPI__ASM_AVR32_TYPES_H
+#define _UAPI__ASM_AVR32_TYPES_H
+
 #include <asm-generic/int-ll64.h>
+
+#endif /* _UAPI__ASM_AVR32_TYPES_H */
index 3eaa68753adba04cd4382424b4b8eeef9a1b440d..8822bf46ddc683758ea9c6be98d0f15d995c9394 100644 (file)
 #define __NR_eventfd           281
 #define __NR_setns             283
 
-
 #endif /* _UAPI__ASM_AVR32_UNISTD_H */
index 9899d3cc6f03b109f352faa9ac51387f96497cf7..7301f4806bbede59cf5eaa48649802ec75cddacf 100644 (file)
@@ -401,9 +401,10 @@ handle_critical:
        /* We should never get here... */
 bad_return:
        sub     r12, pc, (. - 1f)
-       bral    panic
+       lddpc   pc, 2f
        .align  2
 1:     .asciz  "Return from critical exception!"
+2:     .long   panic
 
        .align  1
 do_bus_error_write:
index 6163bd0acb958ae3a016ec125a574764fec48c12..59eae6dfbed2b5f1167864bf6311b0b231da9a10 100644 (file)
 #include <linux/linkage.h>
 
 #include <asm/page.h>
-#include <asm/thread_info.h>
-#include <asm/sysreg.h>
 
        .section .init.text,"ax"
        .global kernel_entry
 kernel_entry:
-       /* Initialize status register */
-       lddpc   r0, init_sr
-       mtsr    SYSREG_SR, r0
-
-       /* Set initial stack pointer */
-       lddpc   sp, stack_addr
-       sub     sp, -THREAD_SIZE
-
-#ifdef CONFIG_FRAME_POINTER
-       /* Mark last stack frame */
-       mov     lr, 0
-       mov     r7, 0
-#endif
-
        /* Start the show */
        lddpc   pc, kernel_start_addr
 
        .align  2
-init_sr:
-       .long   0x007f0000      /* Supervisor mode, everything masked */
-stack_addr:
-       .long   init_thread_union
 kernel_start_addr:
        .long   start_kernel
index d43daf192b21d54df034e52c809a4ac4b2178161..4c530a82fc469f976b8a38a341f077a43f9c9998 100644 (file)
@@ -1992,7 +1992,7 @@ sba_connect_bus(struct pci_bus *bus)
        if (PCI_CONTROLLER(bus)->iommu)
                return;
 
-       handle = PCI_CONTROLLER(bus)->acpi_handle;
+       handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
        if (!handle)
                return;
 
index 80775f55f03f9293c2d05d2598e349f08ef9b56e..71fbaaa495ccc18af5f33cdaef92c0ab228dad8e 100644 (file)
@@ -95,7 +95,7 @@ struct iospace_resource {
 };
 
 struct pci_controller {
-       void *acpi_handle;
+       struct acpi_device *companion;
        void *iommu;
        int segment;
        int node;               /* nearest node with memory or -1 for global allocation */
index 5a9ff1c3c3e912c5d0435ea86900a664e525fa16..cb592773c78b1ef1f86faa4a4190c507c4e9fdbf 100644 (file)
@@ -2166,12 +2166,6 @@ static const struct file_operations pfm_file_ops = {
        .flush          = pfm_flush
 };
 
-static int
-pfmfs_delete_dentry(const struct dentry *dentry)
-{
-       return 1;
-}
-
 static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
        return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
@@ -2179,7 +2173,7 @@ static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
 }
 
 static const struct dentry_operations pfmfs_dentry_operations = {
-       .d_delete = pfmfs_delete_dentry,
+       .d_delete = always_delete_dentry,
        .d_dname = pfmfs_dname,
 };
 
index 2326790b7d8be4f9e6cffbea4c4a22fc1ab91f3e..9e4938d8ca4d297e331131a79d8458d97a27d256 100644 (file)
@@ -436,9 +436,9 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
        if (!controller)
                return NULL;
 
-       controller->acpi_handle = device->handle;
+       controller->companion = device;
 
-       pxm = acpi_get_pxm(controller->acpi_handle);
+       pxm = acpi_get_pxm(device->handle);
 #ifdef CONFIG_NUMA
        if (pxm >= 0)
                controller->node = pxm_to_node(pxm);
@@ -489,7 +489,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
        struct pci_controller *controller = bridge->bus->sysdata;
 
-       ACPI_HANDLE_SET(&bridge->dev, controller->acpi_handle);
+       ACPI_COMPANION_SET(&bridge->dev, controller->companion);
        return 0;
 }
 
index b1725398b5af49683622765652104a85c3d95228..0640739cc20cf2895fca507babfe7de9096d8a41 100644 (file)
@@ -132,7 +132,7 @@ sn_get_bussoft_ptr(struct pci_bus *bus)
        struct acpi_resource_vendor_typed *vendor;
 
 
-       handle = PCI_CONTROLLER(bus)->acpi_handle;
+       handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
        status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
                                          &sn_uuid, &buffer);
        if (ACPI_FAILURE(status)) {
@@ -360,7 +360,7 @@ sn_acpi_get_pcidev_info(struct pci_dev *dev, struct pcidev_info **pcidev_info,
        acpi_status status;
        struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 
-       rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle;
+       rootbus_handle = acpi_device_handle(PCI_CONTROLLER(dev)->companion);
         status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL,
                                        &segment);
         if (ACPI_SUCCESS(status)) {
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
new file mode 100644 (file)
index 0000000..748016c
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <uapi/asm/socket.h>
+
+/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
+ * have to define SOCK_NONBLOCK to a different value here.
+ */
+#define SOCK_NONBLOCK  0x40000000
+
+#endif /* _ASM_SOCKET_H */
index 63f4dd0b49c29c758807b68bbf9b7bf9b71911c3..4006964d8e12646761d954b9f73ff0b503e736b6 100644 (file)
@@ -4,14 +4,11 @@
 /*
  * User space memory access functions
  */
-#include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cache.h>
 #include <asm/errno.h>
 #include <asm-generic/uaccess-unaligned.h>
 
-#include <linux/sched.h>
-
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
 
@@ -36,43 +33,12 @@ extern int __get_user_bad(void);
 extern int __put_kernel_bad(void);
 extern int __put_user_bad(void);
 
-
-/*
- * Test whether a block of memory is a valid user space address.
- * Returns 0 if the range is valid, nonzero otherwise.
- */
-static inline int __range_not_ok(unsigned long addr, unsigned long size,
-                                unsigned long limit)
+static inline long access_ok(int type, const void __user * addr,
+               unsigned long size)
 {
-       unsigned long __newaddr = addr + size;
-       return (__newaddr < addr || __newaddr > limit || size > limit);
+       return 1;
 }
 
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
- *        %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
- *        to write to a block, it is always safe to read from it.
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only.  This function may sleep.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(type, addr, size)                                    \
-(      __chk_user_ptr(addr),                                           \
-       !__range_not_ok((unsigned long) (__force void *) (addr),        \
-                       size, user_addr_max())                          \
-)
-
 #define put_user __put_user
 #define get_user __get_user
 
@@ -253,11 +219,7 @@ extern long lstrnlen_user(const char __user *,long);
 /*
  * Complex access routines -- macros
  */
-#ifdef CONFIG_COMPAT
-#define user_addr_max() (TASK_SIZE)
-#else
-#define user_addr_max() (DEFAULT_TASK_SIZE)
-#endif
+#define user_addr_max() (~0UL)
 
 #define strnlen_user lstrnlen_user
 #define strlen_user(str) lstrnlen_user(str, 0x7fffffffL)
index 7c614d01f1fa42df36a23b3633f6d6ae7d47aa6a..f33113a6141e7540da2195cc72469152edfbecf2 100644 (file)
@@ -1,5 +1,5 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
+#ifndef _UAPI_ASM_SOCKET_H
+#define _UAPI_ASM_SOCKET_H
 
 #include <asm/sockios.h>
 
@@ -77,9 +77,4 @@
 
 #define SO_MAX_PACING_RATE     0x4048
 
-/* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
- * have to define SOCK_NONBLOCK to a different value here.
- */
-#define SOCK_NONBLOCK   0x40000000
-
-#endif /* _ASM_SOCKET_H */
+#endif /* _UAPI_ASM_SOCKET_H */
index b5507ec06b846f09ed4d38c5841b4eecaffb156e..413dc1769299685f00289193ca301ba7e2c0839d 100644 (file)
@@ -161,7 +161,7 @@ static inline void prefetch_dst(const void *addr)
 /* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
  * per loop.  This code is derived from glibc. 
  */
-static inline unsigned long copy_dstaligned(unsigned long dst,
+static noinline unsigned long copy_dstaligned(unsigned long dst,
                                        unsigned long src, unsigned long len)
 {
        /* gcc complains that a2 and a3 may be uninitialized, but actually
@@ -276,7 +276,7 @@ handle_store_error:
 /* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
  * In case of an access fault the faulty address can be read from the per_cpu
  * exception data struct. */
-static unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
+static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
                                        unsigned long len)
 {
        register unsigned long src, dst, t1, t2, t3;
@@ -529,7 +529,7 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
 {
        unsigned long addr = (unsigned long)src;
 
-       if (size < 0 || addr < PAGE_SIZE)
+       if (addr < PAGE_SIZE)
                return -EFAULT;
 
        /* check for I/O space F_EXTEND(0xfff00000) access as well? */
index 7584a5df0fa4a76f5e2815fac25a38436a587955..9d08c71a967ed2e1e86189272369f4b794b8453c 100644 (file)
@@ -282,16 +282,34 @@ bad_area:
 #endif
                switch (code) {
                case 15:        /* Data TLB miss fault/Data page fault */
+                       /* send SIGSEGV when outside of vma */
+                       if (!vma ||
+                           address < vma->vm_start || address > vma->vm_end) {
+                               si.si_signo = SIGSEGV;
+                               si.si_code = SEGV_MAPERR;
+                               break;
+                       }
+
+                       /* send SIGSEGV for wrong permissions */
+                       if ((vma->vm_flags & acc_type) != acc_type) {
+                               si.si_signo = SIGSEGV;
+                               si.si_code = SEGV_ACCERR;
+                               break;
+                       }
+
+                       /* probably address is outside of mapped file */
+                       /* fall through */
                case 17:        /* NA data TLB miss / page fault */
                case 18:        /* Unaligned access - PCXS only */
                        si.si_signo = SIGBUS;
-                       si.si_code = BUS_ADRERR;
+                       si.si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR;
                        break;
                case 16:        /* Non-access instruction TLB miss fault */
                case 26:        /* PCXL: Data memory access rights trap */
                default:
                        si.si_signo = SIGSEGV;
-                       si.si_code = SEGV_MAPERR;
+                       si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
+                       break;
                }
                si.si_errno = 0;
                si.si_addr = (void __user *) address;
index 607acf54a425b2b50913ea6b4f48024a5d21aadc..8a2463670a5b8107243f313c7e272339570ce301 100644 (file)
@@ -111,6 +111,7 @@ endif
 endif
 
 CFLAGS-$(CONFIG_PPC64) := -mtraceback=no -mcall-aixdesc
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc)
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
 CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
index 4c617bf8cdb24af8f3a202d40c4cc98b776768fd..4f6e48277c46170bb806bbb17e41c2887de4054f 100644 (file)
                reg = <0xe2000 0x1000>;
        };
 
-/include/ "qoriq-dma-0.dtsi"
+/include/ "elo3-dma-0.dtsi"
        dma@100300 {
                fsl,iommu-parent = <&pamu0>;
                fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
        };
 
-/include/ "qoriq-dma-1.dtsi"
+/include/ "elo3-dma-1.dtsi"
        dma@101300 {
                fsl,iommu-parent = <&pamu0>;
                fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
new file mode 100644 (file)
index 0000000..3c210e0
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x100000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma0: dma@100300 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,elo3-dma";
+       reg = <0x100300 0x4>,
+             <0x100600 0x4>;
+       ranges = <0x0 0x100100 0x500>;
+       dma-channel@0 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x0 0x80>;
+               interrupts = <28 2 0 0>;
+       };
+       dma-channel@80 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x80 0x80>;
+               interrupts = <29 2 0 0>;
+       };
+       dma-channel@100 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x100 0x80>;
+               interrupts = <30 2 0 0>;
+       };
+       dma-channel@180 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x180 0x80>;
+               interrupts = <31 2 0 0>;
+       };
+       dma-channel@300 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x300 0x80>;
+               interrupts = <76 2 0 0>;
+       };
+       dma-channel@380 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x380 0x80>;
+               interrupts = <77 2 0 0>;
+       };
+       dma-channel@400 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x400 0x80>;
+               interrupts = <78 2 0 0>;
+       };
+       dma-channel@480 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x480 0x80>;
+               interrupts = <79 2 0 0>;
+       };
+};
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
new file mode 100644 (file)
index 0000000..cccf3bb
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x101000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma1: dma@101300 {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "fsl,elo3-dma";
+       reg = <0x101300 0x4>,
+             <0x101600 0x4>;
+       ranges = <0x0 0x101100 0x500>;
+       dma-channel@0 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x0 0x80>;
+               interrupts = <32 2 0 0>;
+       };
+       dma-channel@80 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x80 0x80>;
+               interrupts = <33 2 0 0>;
+       };
+       dma-channel@100 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x100 0x80>;
+               interrupts = <34 2 0 0>;
+       };
+       dma-channel@180 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x180 0x80>;
+               interrupts = <35 2 0 0>;
+       };
+       dma-channel@300 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x300 0x80>;
+               interrupts = <80 2 0 0>;
+       };
+       dma-channel@380 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x380 0x80>;
+               interrupts = <81 2 0 0>;
+       };
+       dma-channel@400 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x400 0x80>;
+               interrupts = <82 2 0 0>;
+       };
+       dma-channel@480 {
+               compatible = "fsl,eloplus-dma-channel";
+               reg = <0x480 0x80>;
+               interrupts = <83 2 0 0>;
+       };
+};
index 510afa362de141465b50aada0cf0e7c9ceb83536..4143a9733cd01e0ad62d9d3d98902e7f414dd8f3 100644 (file)
                reg        = <0xea000 0x4000>;
        };
 
-/include/ "qoriq-dma-0.dtsi"
-/include/ "qoriq-dma-1.dtsi"
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
 
 /include/ "qoriq-espi-0.dtsi"
        spi@110000 {
diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig
new file mode 100644 (file)
index 0000000..62771e0
--- /dev/null
@@ -0,0 +1,352 @@
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_AUDITSYSCALL=y
+CONFIG_IRQ_DOMAIN_DEBUG=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PPC_SPLPAR=y
+CONFIG_SCANLOG=m
+CONFIG_PPC_SMLPAR=y
+CONFIG_DTL=y
+# CONFIG_PPC_PMAC is not set
+CONFIG_RTAS_FLASH=m
+CONFIG_IBMEBUS=y
+CONFIG_HZ_100=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_CMA=y
+CONFIG_PPC_64K_PAGES=y
+CONFIG_PPC_SUBPAGE_PROT=y
+CONFIG_SCHED_SMT=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_RPA=m
+CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_BLK_DEV_FD=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_IBMVSCSI=y
+CONFIG_SCSI_IBMVFC=m
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_IPR=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_UEVENT=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_TUN=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VORTEX=y
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+CONFIG_PCNET32=y
+CONFIG_TIGON3=y
+CONFIG_CHELSIO_T1=m
+CONFIG_BE2NET=m
+CONFIG_S2IO=m
+CONFIG_IBMVETH=y
+CONFIG_EHEA=y
+CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_MLX4_EN=m
+CONFIG_MYRI10GE=m
+CONFIG_QLGE=m
+CONFIG_NETXEN_NIC=m
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_EVDEV=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=m
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_ICOM=m
+CONFIG_SERIAL_JSM=m
+CONFIG_HVC_CONSOLE=y
+CONFIG_HVC_RTAS=y
+CONFIG_HVCS=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_IBM_BSR=m
+CONFIG_GEN_RTC=y
+CONFIG_RAW_DRIVER=y
+CONFIG_MAX_RAW_DEVS=1024
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_OF=y
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_RADEON=y
+CONFIG_FB_IBM_GXT4500=y
+CONFIG_LCD_PLATFORM=m
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=y
+CONFIG_USB_MON=m
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=m
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+CONFIG_INFINIBAND_EHCA=m
+CONFIG_INFINIBAND_CXGB3=m
+CONFIG_INFINIBAND_CXGB4=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_ISER=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_REISERFS_FS=y
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PSTORE=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_CRC_T10DIF=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_CODE_PATCHING_SELFTEST=y
+CONFIG_FTR_FIXUP_SELFTEST=y
+CONFIG_MSI_BITMAP_SELFTEST=y
+CONFIG_XMON=y
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_LZO=m
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
index cc0655a702a739236d29168293478153385ecfc6..935b5e7a1436dbbead45f9e578f75a024de0366f 100644 (file)
@@ -31,6 +31,8 @@
 extern unsigned long randomize_et_dyn(unsigned long base);
 #define ELF_ET_DYN_BASE                (randomize_et_dyn(0x20000000))
 
+#define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
+
 /*
  * Our registers are always unsigned longs, whether we're a 32 bit
  * process or 64 bit, on either a 64 bit or 32 bit kernel.
@@ -86,6 +88,8 @@ typedef elf_vrregset_t elf_fpxregset_t;
 #ifdef __powerpc64__
 # define SET_PERSONALITY(ex)                                   \
 do {                                                           \
+       if (((ex).e_flags & 0x3) == 2)                          \
+               set_thread_flag(TIF_ELF2ABI);                   \
        if ((ex).e_ident[EI_CLASS] == ELFCLASS32)               \
                set_thread_flag(TIF_32BIT);                     \
        else                                                    \
index 0c7f2bfcf1348100fb10c4cd6f74dcee34e42756..d8b600b3f058bda7e0358931b885b8c5d4f30215 100644 (file)
@@ -403,6 +403,8 @@ static inline unsigned long cmo_get_page_size(void)
 extern long pSeries_enable_reloc_on_exc(void);
 extern long pSeries_disable_reloc_on_exc(void);
 
+extern long pseries_big_endian_exceptions(void);
+
 #else
 
 #define pSeries_enable_reloc_on_exc()  do {} while (0)
index a63b045e707ce8a5d34c91e8fa698808a8ee4647..12c32c5f533d924428714301f7482493df00e3c8 100644 (file)
@@ -287,6 +287,32 @@ static inline long disable_reloc_on_exceptions(void) {
        return plpar_set_mode(0, 3, 0, 0);
 }
 
+/*
+ * Take exceptions in big endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_big_endian_exceptions(void)
+{
+       /* mflags = 0: big endian exceptions */
+       return plpar_set_mode(0, 4, 0, 0);
+}
+
+/*
+ * Take exceptions in little endian mode on this partition
+ *
+ * Note: this call has a partition wide scope and can take a while to complete.
+ * If it returns H_LONG_BUSY_* it should be retried periodically until it
+ * returns H_SUCCESS.
+ */
+static inline long enable_little_endian_exceptions(void)
+{
+       /* mflags = 1: little endian exceptions */
+       return plpar_set_mode(1, 4, 0, 0);
+}
+
 static inline long plapr_set_ciabr(unsigned long ciabr)
 {
        return plpar_set_mode(0, 1, ciabr, 0);
index 98da78e0c2c0eeda19f1852faa725742aa43af76..084e0807db988e2a24b836df800406739f91a7e9 100644 (file)
@@ -33,6 +33,7 @@ extern int boot_cpuid;
 extern int spinning_secondaries;
 
 extern void cpu_die(void);
+extern int cpu_to_chip_id(int cpu);
 
 #ifdef CONFIG_SMP
 
@@ -112,7 +113,6 @@ static inline struct cpumask *cpu_core_mask(int cpu)
 }
 
 extern int cpu_to_core_id(int cpu);
-extern int cpu_to_chip_id(int cpu);
 
 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
  *
index 8fd6cf6dcee854c8244010ce12d107abb0fa81d7..9854c564ac525b02b9f81c363a1cd4477cd76d4b 100644 (file)
@@ -105,6 +105,9 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_EMULATE_STACK_STORE        16      /* Is an instruction emulation
                                                for stack store? */
 #define TIF_MEMDIE             17      /* is terminating due to OOM killer */
+#if defined(CONFIG_PPC64)
+#define TIF_ELF2ABI            18      /* function descriptors must die! */
+#endif
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
@@ -183,6 +186,12 @@ static inline bool test_thread_local_flags(unsigned int flags)
 #define is_32bit_task()        (1)
 #endif
 
+#if defined(CONFIG_PPC64)
+#define is_elf2_task() (test_thread_flag(TIF_ELF2ABI))
+#else
+#define is_elf2_task() (0)
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
index 67130206534717b285fb50c77d38e4557f053eae..4bd687d5e7aa3f80e1b33912b5451bd34a3187b1 100644 (file)
@@ -686,6 +686,15 @@ void eeh_save_bars(struct eeh_dev *edev)
 
        for (i = 0; i < 16; i++)
                eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
+
+       /*
+        * For PCI bridges including root port, we need enable bus
+        * master explicitly. Otherwise, it can't fetch IODA table
+        * entries correctly. So we cache the bit in advance so that
+        * we can restore it after reset, either PHB range or PE range.
+        */
+       if (edev->mode & EEH_DEV_BRIDGE)
+               edev->config_space[1] |= PCI_COMMAND_MASTER;
 }
 
 /**
index d27c5afc90aecfbe41506814d3a0c3891bdacc4b..72d748b56c86b2b9ae960e49b819dfedef36f61a 100644 (file)
@@ -74,8 +74,13 @@ static int eeh_event_handler(void * dummy)
                pe = event->pe;
                if (pe) {
                        eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
-                       pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
-                                pe->phb->global_number, pe->addr);
+                       if (pe->type & EEH_PE_PHB)
+                               pr_info("EEH: Detected error on PHB#%d\n",
+                                        pe->phb->global_number);
+                       else
+                               pr_info("EEH: Detected PCI bus error on "
+                                       "PHB#%d-PE#%x\n",
+                                       pe->phb->global_number, pe->addr);
                        eeh_handle_event(pe);
                        eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
                } else {
index 75c2d1009985eb8dbd9f3995fc900f9f21227d01..3386d8ab7eb0607b3c9d6f03e68824d4abe4bd88 100644 (file)
@@ -858,17 +858,21 @@ void show_regs(struct pt_regs * regs)
        printk("MSR: "REG" ", regs->msr);
        printbits(regs->msr, msr_bits);
        printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
-#ifdef CONFIG_PPC64
-       printk("SOFTE: %ld\n", regs->softe);
-#endif
        trap = TRAP(regs);
        if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
-               printk("CFAR: "REG"\n", regs->orig_gpr3);
-       if (trap == 0x300 || trap == 0x600)
+               printk("CFAR: "REG" ", regs->orig_gpr3);
+       if (trap == 0x200 || trap == 0x300 || trap == 0x600)
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-               printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr);
+               printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
 #else
-               printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr);
+               printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+#endif
+#ifdef CONFIG_PPC64
+       printk("SOFTE: %ld ", regs->softe);
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       if (MSR_TM_ACTIVE(regs->msr))
+               printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
 #endif
 
        for (i = 0;  i < 32;  i++) {
@@ -886,9 +890,6 @@ void show_regs(struct pt_regs * regs)
         */
        printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
        printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
-#endif
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       printk("PACATMSCRATCH [%llx]\n", get_paca()->tm_scratch);
 #endif
        show_stack(current, (unsigned long *) regs->gpr[1]);
        if (!user_mode(regs))
@@ -1086,25 +1087,45 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
        regs->msr = MSR_USER;
 #else
        if (!is_32bit_task()) {
-               unsigned long entry, toc;
+               unsigned long entry;
 
-               /* start is a relocated pointer to the function descriptor for
-                * the elf _start routine.  The first entry in the function
-                * descriptor is the entry address of _start and the second
-                * entry is the TOC value we need to use.
-                */
-               __get_user(entry, (unsigned long __user *)start);
-               __get_user(toc, (unsigned long __user *)start+1);
+               if (is_elf2_task()) {
+                       /* Look ma, no function descriptors! */
+                       entry = start;
 
-               /* Check whether the e_entry function descriptor entries
-                * need to be relocated before we can use them.
-                */
-               if (load_addr != 0) {
-                       entry += load_addr;
-                       toc   += load_addr;
+                       /*
+                        * Ulrich says:
+                        *   The latest iteration of the ABI requires that when
+                        *   calling a function (at its global entry point),
+                        *   the caller must ensure r12 holds the entry point
+                        *   address (so that the function can quickly
+                        *   establish addressability).
+                        */
+                       regs->gpr[12] = start;
+                       /* Make sure that's restored on entry to userspace. */
+                       set_thread_flag(TIF_RESTOREALL);
+               } else {
+                       unsigned long toc;
+
+                       /* start is a relocated pointer to the function
+                        * descriptor for the elf _start routine.  The first
+                        * entry in the function descriptor is the entry
+                        * address of _start and the second entry is the TOC
+                        * value we need to use.
+                        */
+                       __get_user(entry, (unsigned long __user *)start);
+                       __get_user(toc, (unsigned long __user *)start+1);
+
+                       /* Check whether the e_entry function descriptor entries
+                        * need to be relocated before we can use them.
+                        */
+                       if (load_addr != 0) {
+                               entry += load_addr;
+                               toc   += load_addr;
+                       }
+                       regs->gpr[2] = toc;
                }
                regs->nip = entry;
-               regs->gpr[2] = toc;
                regs->msr = MSR_USER64;
        } else {
                regs->nip = start;
index f3a47098fb8e90b31e98a7d2808f6e691064e363..fa0ad8aafbccf3950506a96a64f75bc2141f1bd7 100644 (file)
@@ -777,6 +777,26 @@ int of_get_ibm_chip_id(struct device_node *np)
        return -1;
 }
 
+/**
+ * cpu_to_chip_id - Return the cpus chip-id
+ * @cpu: The logical cpu number.
+ *
+ * Return the value of the ibm,chip-id property corresponding to the given
+ * logical cpu number. If the chip-id can not be found, returns -1.
+ */
+int cpu_to_chip_id(int cpu)
+{
+       struct device_node *np;
+
+       np = of_get_cpu_node(cpu, NULL);
+       if (!np)
+               return -1;
+
+       of_node_put(np);
+       return of_get_ibm_chip_id(np);
+}
+EXPORT_SYMBOL(cpu_to_chip_id);
+
 #ifdef CONFIG_PPC_PSERIES
 /*
  * Fix up the uninitialized fields in a new device node:
index 749778e0a69d97dfded0e719066fb1fb4eebac71..1844298f5ea49ea913111c761fb324398c5ed530 100644 (file)
@@ -457,7 +457,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
                if (copy_vsx_to_user(&frame->mc_vsregs, current))
                        return 1;
                msr |= MSR_VSX;
-       }
+       } else if (!ctx_has_vsx_region)
+               /*
+                * With a small context structure we can't hold the VSX
+                * registers, hence clear the MSR value to indicate the state
+                * was not saved.
+                */
+               msr &= ~MSR_VSX;
+
+
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
        /* save spe registers */
index b3c615764c9b97bcb510d017bd9c8ff33e6d69ec..e66f67b8b9e67c1ca4bddbfaa6fb9e32e86c24a9 100644 (file)
@@ -701,12 +701,6 @@ badframe:
 int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
                sigset_t *set, struct pt_regs *regs)
 {
-       /* Handler is *really* a pointer to the function descriptor for
-        * the signal routine.  The first entry in the function
-        * descriptor is the entry address of signal and the second
-        * entry is the TOC value we need to use.
-        */
-       func_descr_t __user *funct_desc_ptr;
        struct rt_sigframe __user *frame;
        unsigned long newsp = 0;
        long err = 0;
@@ -766,19 +760,32 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
                        goto badframe;
                regs->link = (unsigned long) &frame->tramp[0];
        }
-       funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;
 
        /* Allocate a dummy caller frame for the signal handler. */
        newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
        err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
 
        /* Set up "regs" so we "return" to the signal handler. */
-       err |= get_user(regs->nip, &funct_desc_ptr->entry);
+       if (is_elf2_task()) {
+               regs->nip = (unsigned long) ka->sa.sa_handler;
+               regs->gpr[12] = regs->nip;
+       } else {
+               /* Handler is *really* a pointer to the function descriptor for
+                * the signal routine.  The first entry in the function
+                * descriptor is the entry address of signal and the second
+                * entry is the TOC value we need to use.
+                */
+               func_descr_t __user *funct_desc_ptr =
+                       (func_descr_t __user *) ka->sa.sa_handler;
+
+               err |= get_user(regs->nip, &funct_desc_ptr->entry);
+               err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
+       }
+
        /* enter the signal handler in native-endian mode */
        regs->msr &= ~MSR_LE;
        regs->msr |= (MSR_KERNEL & MSR_LE);
        regs->gpr[1] = newsp;
-       err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
        regs->gpr[3] = signr;
        regs->result = 0;
        if (ka->sa.sa_flags & SA_SIGINFO) {
index 930cd8af35035441031e1abecbdde3472ab48808..a3b64f3bf9a298b057cfdc57b008ea01eebecf63 100644 (file)
@@ -597,22 +597,6 @@ out:
        return id;
 }
 
-/* Return the value of the chip-id property corresponding
- * to the given logical cpu.
- */
-int cpu_to_chip_id(int cpu)
-{
-       struct device_node *np;
-
-       np = of_get_cpu_node(cpu, NULL);
-       if (!np)
-               return -1;
-
-       of_node_put(np);
-       return of_get_ibm_chip_id(np);
-}
-EXPORT_SYMBOL(cpu_to_chip_id);
-
 /* Helper routines for cpu to core mapping */
 int cpu_core_index_of_thread(int cpu)
 {
index 192b051df97e27e6a8a74b344151489af6f544d6..b3b144121cc99d64df0c409b0773259770ddf0af 100644 (file)
@@ -213,8 +213,6 @@ static u64 scan_dispatch_log(u64 stop_tb)
        if (i == be64_to_cpu(vpa->dtl_idx))
                return 0;
        while (i < be64_to_cpu(vpa->dtl_idx)) {
-               if (dtl_consumer)
-                       dtl_consumer(dtl, i);
                dtb = be64_to_cpu(dtl->timebase);
                tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
                        be32_to_cpu(dtl->ready_to_enqueue_time);
@@ -227,6 +225,8 @@ static u64 scan_dispatch_log(u64 stop_tb)
                }
                if (dtb > stop_tb)
                        break;
+               if (dtl_consumer)
+                       dtl_consumer(dtl, i);
                stolen += tb_delta;
                ++i;
                ++dtl;
index 45ea281e9a21d479a36e47fbd94a3a0c65075bee..542c6f422e4d4a6288ef1cf18ac877c330ca4339 100644 (file)
@@ -142,6 +142,13 @@ V_FUNCTION_END(__kernel_sigtramp_rt64)
 /* Size of CR reg in DWARF unwind info. */
 #define CRSIZE 4
 
+/* Offset of CR reg within a full word. */
+#ifdef __LITTLE_ENDIAN__
+#define CROFF 0
+#else
+#define CROFF (RSIZE - CRSIZE)
+#endif
+
 /* This is the offset of the VMX reg pointer.  */
 #define VREGS  48*RSIZE+33*8
 
@@ -181,7 +188,14 @@ V_FUNCTION_END(__kernel_sigtramp_rt64)
   rsave (31, 31*RSIZE);                                                        \
   rsave (67, 32*RSIZE);                /* ap, used as temp for nip */          \
   rsave (65, 36*RSIZE);                /* lr */                                \
-  rsave (70, 38*RSIZE + (RSIZE - CRSIZE)) /* cr */
+  rsave (68, 38*RSIZE + CROFF);        /* cr fields */                         \
+  rsave (69, 38*RSIZE + CROFF);                                                \
+  rsave (70, 38*RSIZE + CROFF);                                                \
+  rsave (71, 38*RSIZE + CROFF);                                                \
+  rsave (72, 38*RSIZE + CROFF);                                                \
+  rsave (73, 38*RSIZE + CROFF);                                                \
+  rsave (74, 38*RSIZE + CROFF);                                                \
+  rsave (75, 38*RSIZE + CROFF)
 
 /* Describe where the FP regs are saved.  */
 #define EH_FRAME_FP \
index e7d0c88f621aa08425cb502b9e68b4cbf549ccd3..76a64821f4a23653b64cc6ba0e32daec509ee5b5 100644 (file)
@@ -1419,7 +1419,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
 
                /* needed to ensure proper operation of coherent allocations
                 * later, in case driver doesn't set it explicitly */
-               dma_set_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64));
+               dma_coerce_mask_and_coherent(&viodev->dev, DMA_BIT_MASK(64));
        }
 
        /* register with generic device framework */
index 6936547018b89e21bbe63b2371c9607917fd66dd..c5f734e20b0fc3a885a81a82f41a0ac5cd93f94a 100644 (file)
@@ -123,6 +123,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
        struct mm_struct *mm = current->mm;
        unsigned long addr, len, end;
        unsigned long next;
+       unsigned long flags;
        pgd_t *pgdp;
        int nr = 0;
 
@@ -156,7 +157,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
         * So long as we atomically load page table pointers versus teardown,
         * we can follow the address down to the the page and take a ref on it.
         */
-       local_irq_disable();
+       local_irq_save(flags);
 
        pgdp = pgd_offset(mm, addr);
        do {
@@ -179,7 +180,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
                        break;
        } while (pgdp++, addr = next, addr != end);
 
-       local_irq_enable();
+       local_irq_restore(flags);
 
        return nr;
 }
index 3e99c149271aa0d4f454c95a9d23eaffa7debec6..7ce9cf3b698835c0dd2b2644d137ff7549c68e72 100644 (file)
@@ -258,7 +258,7 @@ static bool slice_scan_available(unsigned long addr,
                slice = GET_HIGH_SLICE_INDEX(addr);
                *boundary_addr = (slice + end) ?
                        ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-               return !!(available.high_slices & (1u << slice));
+               return !!(available.high_slices & (1ul << slice));
        }
 }
 
index c2a566fb8bb89cc816b8841fe47fcdf35b222836..132f8726a257c4ed7608534a8f61496d89b69d80 100644 (file)
@@ -403,3 +403,14 @@ config PPC_DOORBELL
        default n
 
 endmenu
+
+config CPU_LITTLE_ENDIAN
+       bool "Build little endian kernel"
+       default n
+       help
+         This option selects whether a big endian or little endian kernel will
+         be built.
+
+         Note that if cross compiling a little endian kernel,
+         CROSS_COMPILE must point to a toolchain capable of targeting
+         little endian powerpc.
index 8844628915dc4770b8d9ce35f4a03ab9dee5e94c..1cb160dc1609a5be5dbfd17ff0bfcbeaf362829b 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
+#include <asm/smp.h>
 
 
 struct powernv_rng {
index 7fbc25b1813fe59ea80eceba65cb2a76027c0eec..ccb633e077b16d6c7a57bb6cfd4372ca87ceeb6a 100644 (file)
@@ -189,8 +189,9 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
        struct eeh_dev *edev;
        struct eeh_pe pe;
        struct pci_dn *pdn = PCI_DN(dn);
-       const u32 *class_code, *vendor_id, *device_id;
-       const u32 *regs;
+       const __be32 *classp, *vendorp, *devicep;
+       u32 class_code;
+       const __be32 *regs;
        u32 pcie_flags;
        int enable = 0;
        int ret;
@@ -201,22 +202,24 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
                return NULL;
 
        /* Retrieve class/vendor/device IDs */
-       class_code = of_get_property(dn, "class-code", NULL);
-       vendor_id  = of_get_property(dn, "vendor-id", NULL);
-       device_id  = of_get_property(dn, "device-id", NULL);
+       classp = of_get_property(dn, "class-code", NULL);
+       vendorp = of_get_property(dn, "vendor-id", NULL);
+       devicep = of_get_property(dn, "device-id", NULL);
 
        /* Skip for bad OF node or PCI-ISA bridge */
-       if (!class_code || !vendor_id || !device_id)
+       if (!classp || !vendorp || !devicep)
                return NULL;
        if (dn->type && !strcmp(dn->type, "isa"))
                return NULL;
 
+       class_code = of_read_number(classp, 1);
+
        /*
         * Update class code and mode of eeh device. We need
         * correctly reflects that current device is root port
         * or PCIe switch downstream port.
         */
-       edev->class_code = *class_code;
+       edev->class_code = class_code;
        edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
        edev->mode &= 0xFFFFFF00;
        if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
@@ -243,12 +246,12 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
        /* Initialize the fake PE */
        memset(&pe, 0, sizeof(struct eeh_pe));
        pe.phb = edev->phb;
-       pe.config_addr = regs[0];
+       pe.config_addr = of_read_number(regs, 1);
 
        /* Enable EEH on the device */
        ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
        if (!ret) {
-               edev->config_addr = regs[0];
+               edev->config_addr = of_read_number(regs, 1);
                /* Retrieve PE address */
                edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
                pe.addr = edev->pe_config_addr;
index 356bc75ca74f6f0bf8f08dd6eca37b5c538dbfc3..4fca3def9db951896864d0b716d7dea8a2364ba2 100644 (file)
@@ -245,6 +245,23 @@ static void pSeries_lpar_hptab_clear(void)
                                        &(ptes[j].pteh), &(ptes[j].ptel));
                }
        }
+
+#ifdef __LITTLE_ENDIAN__
+       /* Reset exceptions to big endian */
+       if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+               long rc;
+
+               rc = pseries_big_endian_exceptions();
+               /*
+                * At this point it is unlikely panic() will get anything
+                * out to the user, but at least this will stop us from
+                * continuing on further and creating an even more
+                * difficult to debug situation.
+                */
+               if (rc)
+                       panic("Could not enable big endian exceptions");
+       }
+#endif
 }
 
 /*
index a702f1c0824292286bc008300886955844ffc380..72a102758d4e5f54e8540b5e78e6417f1a23da3d 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/of.h>
 #include <asm/archrandom.h>
 #include <asm/machdep.h>
+#include <asm/plpar_wrappers.h>
 
 
 static int pseries_get_random_long(unsigned long *v)
index 1f97e2b87a62b85d30bf848a8dbab31938304f21..c1f1908587011d6d131dd56bdf2e1cb283c41128 100644 (file)
@@ -442,6 +442,32 @@ static void pSeries_machine_kexec(struct kimage *image)
 }
 #endif
 
+#ifdef __LITTLE_ENDIAN__
+long pseries_big_endian_exceptions(void)
+{
+       long rc;
+
+       while (1) {
+               rc = enable_big_endian_exceptions();
+               if (!H_IS_LONG_BUSY(rc))
+                       return rc;
+               mdelay(get_longbusy_msecs(rc));
+       }
+}
+
+static long pseries_little_endian_exceptions(void)
+{
+       long rc;
+
+       while (1) {
+               rc = enable_little_endian_exceptions();
+               if (!H_IS_LONG_BUSY(rc))
+                       return rc;
+               mdelay(get_longbusy_msecs(rc));
+       }
+}
+#endif
+
 static void __init pSeries_setup_arch(void)
 {
        panic_timeout = 10;
@@ -698,6 +724,22 @@ static int __init pSeries_probe(void)
        /* Now try to figure out if we are running on LPAR */
        of_scan_flat_dt(pseries_probe_fw_features, NULL);
 
+#ifdef __LITTLE_ENDIAN__
+       if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+               long rc;
+               /*
+                * Tell the hypervisor that we want our exceptions to
+                * be taken in little endian mode. If this fails we don't
+                * want to use BUG() because it will trigger an exception.
+                */
+               rc = pseries_little_endian_exceptions();
+               if (rc) {
+                       ppc_md.progress("H_SET_MODE LE exception fail", 0);
+                       panic("Could not enable little endian exceptions");
+               }
+       }
+#endif
+
        if (firmware_has_feature(FW_FEATURE_LPAR))
                hpte_init_lpar();
        else
index 8ef53bc2e70e2e442962820382f4664b62aa7a34..aaa46b353715e94fd7eabb71f6227f3971524ba3 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/of.h>
 #include <linux/smp.h>
 #include <linux/time.h>
+#include <linux/of_fdt.h>
 
 #include <asm/machdep.h>
 #include <asm/udbg.h>
index d18e6cc19df376eab16f7dcbcb09516f874ac289..a3c87f3957502bfe8713af0bf796b6b4ddaa417c 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/io.h>
+#include <linux/of_address.h>
 
 #include "wsp.h"
 
index 2d3b1dd9571da71aec4b11ab97ef5a0cc106be4f..9cd92e645028ecc8041ab856a5cf35621a2929c7 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
index cb565bf93650ef7e49f1f57ae162c996b062b109..3f672980793817c72c8bad6f203ec65ed0472c4d 100644 (file)
@@ -15,6 +15,8 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/time.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 
 #include <asm/reg_a2.h>
 #include <asm/irq.h>
index 508ec8282b96f7e2d67641619d48d43f3917f808..a87b414c766af3f986b3575a37183e8ddb632255 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/of.h>
 #include <linux/smp.h>
 #include <linux/time.h>
+#include <linux/of_fdt.h>
 
 #include <asm/machdep.h>
 #include <asm/udbg.h>
index 8928507affead7aa3f9f128232af8d4822188331..6538b4de34fccdab9fbda2cbe5a93ebf161c1277 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/of.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <linux/of_address.h>
 
 #include <asm/cputhreads.h>
 #include <asm/reg_a2.h>
index ddb6efe889144dd78e15e80150e7b8a75bbb2d89..58cd1f00e1efa8a02754c4855ac21b24a316a37e 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/time.h>
+#include <linux/of_address.h>
 
 #include <asm/scom.h>
 
index 7d7443283a9d5b2dc19796a7d18e9bebd66e2164..947b5c417e830ae0ead192964ef07f8a68d88851 100644 (file)
@@ -15,7 +15,7 @@ struct pci_sysdata {
        int             domain;         /* PCI domain */
        int             node;           /* NUMA node */
 #ifdef CONFIG_ACPI
-       void            *acpi;          /* ACPI-specific data */
+       struct acpi_device *companion;  /* ACPI companion device */
 #endif
 #ifdef CONFIG_X86_64
        void            *iommu;         /* IOMMU private data */
index b93e09a0fa21c34ee20a5cf3ddd8319efa07f40c..37813b5ddc37472dba6c64b8ff3f2508dc085de0 100644 (file)
 #define MSR_PP1_ENERGY_STATUS          0x00000641
 #define MSR_PP1_POLICY                 0x00000642
 
+#define MSR_CORE_C1_RES                        0x00000660
+
 #define MSR_AMD64_MC0_MASK             0xc0010044
 
 #define MSR_IA32_MCx_CTL(x)            (MSR_IA32_MC0_CTL + 4*(x))
index daff69e21150d054a109a889630f730702088b76..1185fe7a7f47b053ba3c0fcb1b079d1614581e57 100644 (file)
@@ -296,4 +296,4 @@ static struct kernel_param_ops audit_param_ops = {
        .get = param_get_bool,
 };
 
-module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
+arch_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);
index a7cccb6d7fec680e184a57c197106a7d475467b1..c96314abd144ca91cfcccaba72352ad0fdc6cf5b 100644 (file)
@@ -61,6 +61,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 #if PAGETABLE_LEVELS > 2
 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
+       struct page *page = virt_to_page(pmd);
        paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
        /*
         * NOTE! For PAE, any changes to the top page-directory-pointer-table
@@ -69,7 +70,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 #ifdef CONFIG_X86_PAE
        tlb->need_flush_all = 1;
 #endif
-       tlb_remove_page(tlb, virt_to_page(pmd));
+       pgtable_pmd_page_dtor(page);
+       tlb_remove_page(tlb, page);
 }
 
 #if PAGETABLE_LEVELS > 3
@@ -209,7 +211,7 @@ static int preallocate_pmds(pmd_t *pmds[])
                if (!pmd)
                        failed = true;
                if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
-                       free_page((unsigned long)pmds[i]);
+                       free_page((unsigned long)pmd);
                        pmd = NULL;
                        failed = true;
                }
index 7fb24e53d4c8b88b1fd374a327e0535269f7e328..4f25ec0775526f45133d9087bc13fd4f76dfb24e 100644 (file)
@@ -518,7 +518,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
        sd = &info->sd;
        sd->domain = domain;
        sd->node = node;
-       sd->acpi = device->handle;
+       sd->companion = device;
        /*
         * Maybe the desired pci bus has been already scanned. In such case
         * it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -589,7 +589,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
        struct pci_sysdata *sd = bridge->bus->sysdata;
 
-       ACPI_HANDLE_SET(&bridge->dev, sd->acpi);
+       ACPI_COMPANION_SET(&bridge->dev, sd->companion);
        return 0;
 }
 
index 862f458d4760340935017f61f27c59feab476119..cdc629cf075b74f27f7801565ec3d90f3e299ce0 100644 (file)
@@ -171,9 +171,12 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-static void blk_mq_rq_ctx_init(struct blk_mq_ctx *ctx, struct request *rq,
-                              unsigned int rw_flags)
+static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
+                              struct request *rq, unsigned int rw_flags)
 {
+       if (blk_queue_io_stat(q))
+               rw_flags |= REQ_IO_STAT;
+
        rq->mq_ctx = ctx;
        rq->cmd_flags = rw_flags;
        ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
@@ -197,7 +200,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 
                rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved);
                if (rq) {
-                       blk_mq_rq_ctx_init(ctx, rq, rw);
+                       blk_mq_rq_ctx_init(q, ctx, rq, rw);
                        break;
                } else if (!(gfp & __GFP_WAIT))
                        break;
@@ -718,6 +721,8 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
 {
        struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+       trace_block_rq_insert(hctx->queue, rq);
+
        list_add_tail(&rq->queuelist, &ctx->rq_list);
        blk_mq_hctx_mark_pending(hctx, ctx);
 
@@ -921,7 +926,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
        trace_block_getrq(q, bio, rw);
        rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
        if (likely(rq))
-               blk_mq_rq_ctx_init(ctx, rq, rw);
+               blk_mq_rq_ctx_init(q, ctx, rq, rw);
        else {
                blk_mq_put_ctx(ctx);
                trace_block_sleeprq(q, bio, rw);
@@ -1377,6 +1382,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
        q->queue_hw_ctx = hctxs;
 
        q->mq_ops = reg->ops;
+       q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
 
        blk_queue_make_request(q, blk_mq_make_request);
        blk_queue_rq_timed_out(q, reg->ops->timeout);
index a8287b49d0621d1778295ad0516c8ccbf22ed0fa..dc51f467a560558ab4812d339fe2bd24f83a00b2 100644 (file)
@@ -96,6 +96,7 @@
  * - Code works, detects all the partitions.
  *
  ************************************************************/
+#include <linux/kernel.h>
 #include <linux/crc32.h>
 #include <linux/ctype.h>
 #include <linux/math64.h>
@@ -715,8 +716,8 @@ int efi_partition(struct parsed_partitions *state)
                efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
 
                /* Naively convert UTF16-LE to 7 bits. */
-               label_max = min(sizeof(info->volname) - 1,
-                               sizeof(ptes[i].partition_name));
+               label_max = min(ARRAY_SIZE(info->volname) - 1,
+                               ARRAY_SIZE(ptes[i].partition_name));
                info->volname[label_max] = 0;
                while (label_count < label_max) {
                        u8 c = ptes[i].partition_name[label_count] & 0xff;
index 71f337aefa3905feaca892b7ac3b49b4bcb411e3..4ae5734fb4733bb8e264565867fa6d73d7b11f6b 100644 (file)
@@ -1402,6 +1402,9 @@ config CRYPTO_USER_API_SKCIPHER
          This option enables the user-spaces interface for symmetric
          key cipher algorithms.
 
+config CRYPTO_HASH_INFO
+       bool
+
 source "drivers/crypto/Kconfig"
 source crypto/asymmetric_keys/Kconfig
 
index 80019ba8da3a2113ce8a48bf924bba9ca7d96e50..b3a7e807e08bca306619a3e7250afbc9160fecbe 100644 (file)
@@ -104,3 +104,4 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 obj-$(CONFIG_XOR_BLOCKS) += xor.o
 obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
+obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
index 6d2c2ea12559c57624b687a06e0b6f3f3ae4291a..03a6eb95ab500bd37fe2b2e4000a73b1c01a99b0 100644 (file)
@@ -12,6 +12,8 @@ if ASYMMETRIC_KEY_TYPE
 config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
        tristate "Asymmetric public-key crypto algorithm subtype"
        select MPILIB
+       select PUBLIC_KEY_ALGO_RSA
+       select CRYPTO_HASH_INFO
        help
          This option provides support for asymmetric public key type handling.
          If signature generation and/or verification are to be used,
@@ -20,8 +22,8 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
 
 config PUBLIC_KEY_ALGO_RSA
        tristate "RSA public-key algorithm"
-       depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
        select MPILIB_EXTRA
+       select MPILIB
        help
          This option enables support for the RSA algorithm (PKCS#1, RFC3447).
 
index cf807654d221c80fced3280ae23bc666a2089348..b77eb53047882ad26de8de4892e8568a4b792326 100644 (file)
@@ -209,6 +209,7 @@ struct key_type key_type_asymmetric = {
        .match          = asymmetric_key_match,
        .destroy        = asymmetric_key_destroy,
        .describe       = asymmetric_key_describe,
+       .def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE,
 };
 EXPORT_SYMBOL_GPL(key_type_asymmetric);
 
index cb2e29180a87286321f593649365c2cb2fc95247..97eb001960b97774e89643f65711d2a68e6a3a8f 100644 (file)
 
 MODULE_LICENSE("GPL");
 
-const char *const pkey_algo[PKEY_ALGO__LAST] = {
+const char *const pkey_algo_name[PKEY_ALGO__LAST] = {
        [PKEY_ALGO_DSA]         = "DSA",
        [PKEY_ALGO_RSA]         = "RSA",
 };
-EXPORT_SYMBOL_GPL(pkey_algo);
+EXPORT_SYMBOL_GPL(pkey_algo_name);
 
-const char *const pkey_hash_algo[PKEY_HASH__LAST] = {
-       [PKEY_HASH_MD4]         = "md4",
-       [PKEY_HASH_MD5]         = "md5",
-       [PKEY_HASH_SHA1]        = "sha1",
-       [PKEY_HASH_RIPE_MD_160] = "rmd160",
-       [PKEY_HASH_SHA256]      = "sha256",
-       [PKEY_HASH_SHA384]      = "sha384",
-       [PKEY_HASH_SHA512]      = "sha512",
-       [PKEY_HASH_SHA224]      = "sha224",
+const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST] = {
+#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \
+       defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE)
+       [PKEY_ALGO_RSA]         = &RSA_public_key_algorithm,
+#endif
 };
-EXPORT_SYMBOL_GPL(pkey_hash_algo);
+EXPORT_SYMBOL_GPL(pkey_algo);
 
-const char *const pkey_id_type[PKEY_ID_TYPE__LAST] = {
+const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST] = {
        [PKEY_ID_PGP]           = "PGP",
        [PKEY_ID_X509]          = "X509",
 };
-EXPORT_SYMBOL_GPL(pkey_id_type);
+EXPORT_SYMBOL_GPL(pkey_id_type_name);
 
 /*
  * Provide a part of a description of the key for /proc/keys.
@@ -56,7 +52,7 @@ static void public_key_describe(const struct key *asymmetric_key,
 
        if (key)
                seq_printf(m, "%s.%s",
-                          pkey_id_type[key->id_type], key->algo->name);
+                          pkey_id_type_name[key->id_type], key->algo->name);
 }
 
 /*
@@ -78,21 +74,45 @@ EXPORT_SYMBOL_GPL(public_key_destroy);
 /*
  * Verify a signature using a public key.
  */
-static int public_key_verify_signature(const struct key *key,
-                                      const struct public_key_signature *sig)
+int public_key_verify_signature(const struct public_key *pk,
+                               const struct public_key_signature *sig)
 {
-       const struct public_key *pk = key->payload.data;
+       const struct public_key_algorithm *algo;
+
+       BUG_ON(!pk);
+       BUG_ON(!pk->mpi[0]);
+       BUG_ON(!pk->mpi[1]);
+       BUG_ON(!sig);
+       BUG_ON(!sig->digest);
+       BUG_ON(!sig->mpi[0]);
+
+       algo = pk->algo;
+       if (!algo) {
+               if (pk->pkey_algo >= PKEY_ALGO__LAST)
+                       return -ENOPKG;
+               algo = pkey_algo[pk->pkey_algo];
+               if (!algo)
+                       return -ENOPKG;
+       }
 
-       if (!pk->algo->verify_signature)
+       if (!algo->verify_signature)
                return -ENOTSUPP;
 
-       if (sig->nr_mpi != pk->algo->n_sig_mpi) {
+       if (sig->nr_mpi != algo->n_sig_mpi) {
                pr_debug("Signature has %u MPI not %u\n",
-                        sig->nr_mpi, pk->algo->n_sig_mpi);
+                        sig->nr_mpi, algo->n_sig_mpi);
                return -EINVAL;
        }
 
-       return pk->algo->verify_signature(pk, sig);
+       return algo->verify_signature(pk, sig);
+}
+EXPORT_SYMBOL_GPL(public_key_verify_signature);
+
+static int public_key_verify_signature_2(const struct key *key,
+                                        const struct public_key_signature *sig)
+{
+       const struct public_key *pk = key->payload.data;
+       return public_key_verify_signature(pk, sig);
 }
 
 /*
@@ -103,6 +123,6 @@ struct asymmetric_key_subtype public_key_subtype = {
        .name                   = "public_key",
        .describe               = public_key_describe,
        .destroy                = public_key_destroy,
-       .verify_signature       = public_key_verify_signature,
+       .verify_signature       = public_key_verify_signature_2,
 };
 EXPORT_SYMBOL_GPL(public_key_subtype);
index 5e5e35626899e845bb673530854f9aa127336fd6..5c37a22a0637acdc7abf8f07c610c2f72dfd59bb 100644 (file)
@@ -28,3 +28,9 @@ struct public_key_algorithm {
 };
 
 extern const struct public_key_algorithm RSA_public_key_algorithm;
+
+/*
+ * public_key.c
+ */
+extern int public_key_verify_signature(const struct public_key *pk,
+                                      const struct public_key_signature *sig);
index 4a6a0696f8a3b165618fe7c62516a7446e354c24..90a17f59ba2800d197366dbcb0835a9adc598320 100644 (file)
@@ -73,13 +73,13 @@ static const struct {
        size_t size;
 } RSA_ASN1_templates[PKEY_HASH__LAST] = {
 #define _(X) { RSA_digest_info_##X, sizeof(RSA_digest_info_##X) }
-       [PKEY_HASH_MD5]         = _(MD5),
-       [PKEY_HASH_SHA1]        = _(SHA1),
-       [PKEY_HASH_RIPE_MD_160] = _(RIPE_MD_160),
-       [PKEY_HASH_SHA256]      = _(SHA256),
-       [PKEY_HASH_SHA384]      = _(SHA384),
-       [PKEY_HASH_SHA512]      = _(SHA512),
-       [PKEY_HASH_SHA224]      = _(SHA224),
+       [HASH_ALGO_MD5]         = _(MD5),
+       [HASH_ALGO_SHA1]        = _(SHA1),
+       [HASH_ALGO_RIPE_MD_160] = _(RIPE_MD_160),
+       [HASH_ALGO_SHA256]      = _(SHA256),
+       [HASH_ALGO_SHA384]      = _(SHA384),
+       [HASH_ALGO_SHA512]      = _(SHA512),
+       [HASH_ALGO_SHA224]      = _(SHA224),
 #undef _
 };
 
index facbf26bc6bbbc91eb879b0a5cdf40d01e8f2b05..29893162497ca352101b4d2126a0d03590e50ed1 100644 (file)
@@ -47,6 +47,8 @@ void x509_free_certificate(struct x509_certificate *cert)
                kfree(cert->subject);
                kfree(cert->fingerprint);
                kfree(cert->authority);
+               kfree(cert->sig.digest);
+               mpi_free(cert->sig.rsa.s);
                kfree(cert);
        }
 }
@@ -152,33 +154,33 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
                return -ENOPKG; /* Unsupported combination */
 
        case OID_md4WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_MD5;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_MD5;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha1WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA1;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA1;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha256WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA256;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA256;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha384WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA384;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA384;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha512WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA512;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA512;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha224WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA224;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = HASH_ALGO_SHA224;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
        }
 
@@ -203,8 +205,8 @@ int x509_note_signature(void *context, size_t hdrlen,
                return -EINVAL;
        }
 
-       ctx->cert->sig = value;
-       ctx->cert->sig_size = vlen;
+       ctx->cert->raw_sig = value;
+       ctx->cert->raw_sig_size = vlen;
        return 0;
 }
 
@@ -343,8 +345,9 @@ int x509_extract_key_data(void *context, size_t hdrlen,
        if (ctx->last_oid != OID_rsaEncryption)
                return -ENOPKG;
 
-       /* There seems to be an extraneous 0 byte on the front of the data */
-       ctx->cert->pkey_algo = PKEY_ALGO_RSA;
+       ctx->cert->pub->pkey_algo = PKEY_ALGO_RSA;
+
+       /* Discard the BIT STRING metadata */
        ctx->key = value + 1;
        ctx->key_size = vlen - 1;
        return 0;
index f86dc5fcc4ad46accfb003b464c784fb5763218b..87d9cc26f630625d7c57e3309456de2a356a46b5 100644 (file)
@@ -9,6 +9,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
+#include <linux/time.h>
 #include <crypto/public_key.h>
 
 struct x509_certificate {
@@ -20,13 +21,11 @@ struct x509_certificate {
        char            *authority;             /* Authority key fingerprint as hex */
        struct tm       valid_from;
        struct tm       valid_to;
-       enum pkey_algo  pkey_algo : 8;          /* Public key algorithm */
-       enum pkey_algo  sig_pkey_algo : 8;      /* Signature public key algorithm */
-       enum pkey_hash_algo sig_hash_algo : 8;  /* Signature hash algorithm */
        const void      *tbs;                   /* Signed data */
-       size_t          tbs_size;               /* Size of signed data */
-       const void      *sig;                   /* Signature data */
-       size_t          sig_size;               /* Size of sigature */
+       unsigned        tbs_size;               /* Size of signed data */
+       unsigned        raw_sig_size;           /* Size of sigature */
+       const void      *raw_sig;               /* Signature data */
+       struct public_key_signature sig;        /* Signature parameters */
 };
 
 /*
@@ -34,3 +33,10 @@ struct x509_certificate {
  */
 extern void x509_free_certificate(struct x509_certificate *cert);
 extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen);
+
+/*
+ * x509_public_key.c
+ */
+extern int x509_get_sig_params(struct x509_certificate *cert);
+extern int x509_check_signature(const struct public_key *pub,
+                               struct x509_certificate *cert);
index 06007f0e880c330903b5536e9d9c194da302738c..f83300b6e8c13033e5e239be8d48e7fb379668de 100644 (file)
 #include <linux/asn1_decoder.h>
 #include <keys/asymmetric-subtype.h>
 #include <keys/asymmetric-parser.h>
+#include <keys/system_keyring.h>
 #include <crypto/hash.h>
 #include "asymmetric_keys.h"
 #include "public_key.h"
 #include "x509_parser.h"
 
-static const
-struct public_key_algorithm *x509_public_key_algorithms[PKEY_ALGO__LAST] = {
-       [PKEY_ALGO_DSA]         = NULL,
-#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \
-       defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE)
-       [PKEY_ALGO_RSA]         = &RSA_public_key_algorithm,
-#endif
-};
+/*
+ * Find a key in the given keyring by issuer and authority.
+ */
+static struct key *x509_request_asymmetric_key(
+       struct key *keyring,
+       const char *signer, size_t signer_len,
+       const char *authority, size_t auth_len)
+{
+       key_ref_t key;
+       char *id;
+
+       /* Construct an identifier. */
+       id = kmalloc(signer_len + 2 + auth_len + 1, GFP_KERNEL);
+       if (!id)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(id, signer, signer_len);
+       id[signer_len + 0] = ':';
+       id[signer_len + 1] = ' ';
+       memcpy(id + signer_len + 2, authority, auth_len);
+       id[signer_len + 2 + auth_len] = 0;
+
+       pr_debug("Look up: \"%s\"\n", id);
+
+       key = keyring_search(make_key_ref(keyring, 1),
+                            &key_type_asymmetric, id);
+       if (IS_ERR(key))
+               pr_debug("Request for module key '%s' err %ld\n",
+                        id, PTR_ERR(key));
+       kfree(id);
+
+       if (IS_ERR(key)) {
+               switch (PTR_ERR(key)) {
+                       /* Hide some search errors */
+               case -EACCES:
+               case -ENOTDIR:
+               case -EAGAIN:
+                       return ERR_PTR(-ENOKEY);
+               default:
+                       return ERR_CAST(key);
+               }
+       }
+
+       pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key)));
+       return key_ref_to_ptr(key);
+}
 
 /*
- * Check the signature on a certificate using the provided public key
+ * Set up the signature parameters in an X.509 certificate.  This involves
+ * digesting the signed data and extracting the signature.
  */
-static int x509_check_signature(const struct public_key *pub,
-                               const struct x509_certificate *cert)
+int x509_get_sig_params(struct x509_certificate *cert)
 {
-       struct public_key_signature *sig;
        struct crypto_shash *tfm;
        struct shash_desc *desc;
        size_t digest_size, desc_size;
+       void *digest;
        int ret;
 
        pr_devel("==>%s()\n", __func__);
-       
+
+       if (cert->sig.rsa.s)
+               return 0;
+
+       cert->sig.rsa.s = mpi_read_raw_data(cert->raw_sig, cert->raw_sig_size);
+       if (!cert->sig.rsa.s)
+               return -ENOMEM;
+       cert->sig.nr_mpi = 1;
+
        /* Allocate the hashing algorithm we're going to need and find out how
         * big the hash operational data will be.
         */
-       tfm = crypto_alloc_shash(pkey_hash_algo[cert->sig_hash_algo], 0, 0);
+       tfm = crypto_alloc_shash(hash_algo_name[cert->sig.pkey_hash_algo], 0, 0);
        if (IS_ERR(tfm))
                return (PTR_ERR(tfm) == -ENOENT) ? -ENOPKG : PTR_ERR(tfm);
 
        desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
        digest_size = crypto_shash_digestsize(tfm);
 
-       /* We allocate the hash operational data storage on the end of our
-        * context data.
+       /* We allocate the hash operational data storage on the end of the
+        * digest storage space.
         */
        ret = -ENOMEM;
-       sig = kzalloc(sizeof(*sig) + desc_size + digest_size, GFP_KERNEL);
-       if (!sig)
-               goto error_no_sig;
+       digest = kzalloc(digest_size + desc_size, GFP_KERNEL);
+       if (!digest)
+               goto error;
 
-       sig->pkey_hash_algo     = cert->sig_hash_algo;
-       sig->digest             = (u8 *)sig + sizeof(*sig) + desc_size;
-       sig->digest_size        = digest_size;
+       cert->sig.digest = digest;
+       cert->sig.digest_size = digest_size;
 
-       desc = (void *)sig + sizeof(*sig);
-       desc->tfm       = tfm;
-       desc->flags     = CRYPTO_TFM_REQ_MAY_SLEEP;
+       desc = digest + digest_size;
+       desc->tfm = tfm;
+       desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
        ret = crypto_shash_init(desc);
        if (ret < 0)
                goto error;
+       might_sleep();
+       ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, digest);
+error:
+       crypto_free_shash(tfm);
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(x509_get_sig_params);
 
-       ret = -ENOMEM;
-       sig->rsa.s = mpi_read_raw_data(cert->sig, cert->sig_size);
-       if (!sig->rsa.s)
-               goto error;
+/*
+ * Check the signature on a certificate using the provided public key
+ */
+int x509_check_signature(const struct public_key *pub,
+                        struct x509_certificate *cert)
+{
+       int ret;
 
-       ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest);
-       if (ret < 0)
-               goto error_mpi;
+       pr_devel("==>%s()\n", __func__);
 
-       ret = pub->algo->verify_signature(pub, sig);
+       ret = x509_get_sig_params(cert);
+       if (ret < 0)
+               return ret;
 
+       ret = public_key_verify_signature(pub, &cert->sig);
        pr_debug("Cert Verification: %d\n", ret);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(x509_check_signature);
 
-error_mpi:
-       mpi_free(sig->rsa.s);
-error:
-       kfree(sig);
-error_no_sig:
-       crypto_free_shash(tfm);
+/*
+ * Check the new certificate against the ones in the trust keyring.  If one of
+ * those is the signing key and validates the new certificate, then mark the
+ * new certificate as being trusted.
+ *
+ * Return 0 if the new certificate was successfully validated, 1 if we couldn't
+ * find a matching parent certificate in the trusted list and an error if there
+ * is a matching certificate but the signature check fails.
+ */
+static int x509_validate_trust(struct x509_certificate *cert,
+                              struct key *trust_keyring)
+{
+       const struct public_key *pk;
+       struct key *key;
+       int ret = 1;
 
-       pr_devel("<==%s() = %d\n", __func__, ret);
+       key = x509_request_asymmetric_key(trust_keyring,
+                                         cert->issuer, strlen(cert->issuer),
+                                         cert->authority,
+                                         strlen(cert->authority));
+       if (!IS_ERR(key))  {
+               pk = key->payload.data;
+               ret = x509_check_signature(pk, cert);
+       }
        return ret;
 }
 
@@ -106,7 +183,6 @@ error_no_sig:
 static int x509_key_preparse(struct key_preparsed_payload *prep)
 {
        struct x509_certificate *cert;
-       struct tm now;
        size_t srlen, sulen;
        char *desc = NULL;
        int ret;
@@ -117,7 +193,18 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 
        pr_devel("Cert Issuer: %s\n", cert->issuer);
        pr_devel("Cert Subject: %s\n", cert->subject);
-       pr_devel("Cert Key Algo: %s\n", pkey_algo[cert->pkey_algo]);
+
+       if (cert->pub->pkey_algo >= PKEY_ALGO__LAST ||
+           cert->sig.pkey_algo >= PKEY_ALGO__LAST ||
+           cert->sig.pkey_hash_algo >= PKEY_HASH__LAST ||
+           !pkey_algo[cert->pub->pkey_algo] ||
+           !pkey_algo[cert->sig.pkey_algo] ||
+           !hash_algo_name[cert->sig.pkey_hash_algo]) {
+               ret = -ENOPKG;
+               goto error_free_cert;
+       }
+
+       pr_devel("Cert Key Algo: %s\n", pkey_algo_name[cert->pub->pkey_algo]);
        pr_devel("Cert Valid From: %04ld-%02d-%02d %02d:%02d:%02d\n",
                 cert->valid_from.tm_year + 1900, cert->valid_from.tm_mon + 1,
                 cert->valid_from.tm_mday, cert->valid_from.tm_hour,
@@ -127,61 +214,29 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
                 cert->valid_to.tm_mday, cert->valid_to.tm_hour,
                 cert->valid_to.tm_min,  cert->valid_to.tm_sec);
        pr_devel("Cert Signature: %s + %s\n",
-                pkey_algo[cert->sig_pkey_algo],
-                pkey_hash_algo[cert->sig_hash_algo]);
+                pkey_algo_name[cert->sig.pkey_algo],
+                hash_algo_name[cert->sig.pkey_hash_algo]);
 
-       if (!cert->fingerprint || !cert->authority) {
-               pr_warn("Cert for '%s' must have SubjKeyId and AuthKeyId extensions\n",
+       if (!cert->fingerprint) {
+               pr_warn("Cert for '%s' must have a SubjKeyId extension\n",
                        cert->subject);
                ret = -EKEYREJECTED;
                goto error_free_cert;
        }
 
-       time_to_tm(CURRENT_TIME.tv_sec, 0, &now);
-       pr_devel("Now: %04ld-%02d-%02d %02d:%02d:%02d\n",
-                now.tm_year + 1900, now.tm_mon + 1, now.tm_mday,
-                now.tm_hour, now.tm_min,  now.tm_sec);
-       if (now.tm_year < cert->valid_from.tm_year ||
-           (now.tm_year == cert->valid_from.tm_year &&
-            (now.tm_mon < cert->valid_from.tm_mon ||
-             (now.tm_mon == cert->valid_from.tm_mon &&
-              (now.tm_mday < cert->valid_from.tm_mday ||
-               (now.tm_mday == cert->valid_from.tm_mday &&
-                (now.tm_hour < cert->valid_from.tm_hour ||
-                 (now.tm_hour == cert->valid_from.tm_hour &&
-                  (now.tm_min < cert->valid_from.tm_min ||
-                   (now.tm_min == cert->valid_from.tm_min &&
-                    (now.tm_sec < cert->valid_from.tm_sec
-                     ))))))))))) {
-               pr_warn("Cert %s is not yet valid\n", cert->fingerprint);
-               ret = -EKEYREJECTED;
-               goto error_free_cert;
-       }
-       if (now.tm_year > cert->valid_to.tm_year ||
-           (now.tm_year == cert->valid_to.tm_year &&
-            (now.tm_mon > cert->valid_to.tm_mon ||
-             (now.tm_mon == cert->valid_to.tm_mon &&
-              (now.tm_mday > cert->valid_to.tm_mday ||
-               (now.tm_mday == cert->valid_to.tm_mday &&
-                (now.tm_hour > cert->valid_to.tm_hour ||
-                 (now.tm_hour == cert->valid_to.tm_hour &&
-                  (now.tm_min > cert->valid_to.tm_min ||
-                   (now.tm_min == cert->valid_to.tm_min &&
-                    (now.tm_sec > cert->valid_to.tm_sec
-                     ))))))))))) {
-               pr_warn("Cert %s has expired\n", cert->fingerprint);
-               ret = -EKEYEXPIRED;
-               goto error_free_cert;
-       }
-
-       cert->pub->algo = x509_public_key_algorithms[cert->pkey_algo];
+       cert->pub->algo = pkey_algo[cert->pub->pkey_algo];
        cert->pub->id_type = PKEY_ID_X509;
 
-       /* Check the signature on the key */
-       if (strcmp(cert->fingerprint, cert->authority) == 0) {
-               ret = x509_check_signature(cert->pub, cert);
+       /* Check the signature on the key if it appears to be self-signed */
+       if (!cert->authority ||
+           strcmp(cert->fingerprint, cert->authority) == 0) {
+               ret = x509_check_signature(cert->pub, cert); /* self-signed */
                if (ret < 0)
                        goto error_free_cert;
+       } else {
+               ret = x509_validate_trust(cert, system_trusted_keyring);
+               if (!ret)
+                       prep->trusted = 1;
        }
 
        /* Propose a description */
@@ -237,3 +292,6 @@ static void __exit x509_key_exit(void)
 
 module_init(x509_key_init);
 module_exit(x509_key_exit);
+
+MODULE_DESCRIPTION("X.509 certificate parser");
+MODULE_LICENSE("GPL");
index 9e62feffb374536995e08357866df9787635b19e..f8c0b8dbeb7582beca1ee7fd5c7aaac58aba23cd 100644 (file)
@@ -50,33 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
                                                      &dest, 1, &src, 1, len);
        struct dma_device *device = chan ? chan->device : NULL;
        struct dma_async_tx_descriptor *tx = NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
 
-       if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
-               dma_addr_t dma_dest, dma_src;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+
+       if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
                unsigned long dma_prep_flags = 0;
 
                if (submit->cb_fn)
                        dma_prep_flags |= DMA_PREP_INTERRUPT;
                if (submit->flags & ASYNC_TX_FENCE)
                        dma_prep_flags |= DMA_PREP_FENCE;
-               dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
-                                       DMA_FROM_DEVICE);
-
-               dma_src = dma_map_page(device->dev, src, src_offset, len,
-                                      DMA_TO_DEVICE);
-
-               tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
-                                                   len, dma_prep_flags);
-               if (!tx) {
-                       dma_unmap_page(device->dev, dma_dest, len,
-                                      DMA_FROM_DEVICE);
-                       dma_unmap_page(device->dev, dma_src, len,
-                                      DMA_TO_DEVICE);
-               }
+
+               unmap->to_cnt = 1;
+               unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
+                                             DMA_TO_DEVICE);
+               unmap->from_cnt = 1;
+               unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
+                                             DMA_FROM_DEVICE);
+               unmap->len = len;
+
+               tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                                   unmap->addr[0], len,
+                                                   dma_prep_flags);
        }
 
        if (tx) {
                pr_debug("%s: (async) len: %zu\n", __func__, len);
+
+               dma_set_unmap(tx, unmap);
                async_tx_submit(chan, tx, submit);
        } else {
                void *dest_buf, *src_buf;
@@ -96,6 +99,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
                async_tx_sync_epilog(submit);
        }
 
+       dmaengine_unmap_put(unmap);
+
        return tx;
 }
 EXPORT_SYMBOL_GPL(async_memcpy);
index 91d5d385899ee06c507cee1aa496330405d6276b..d05327caf69dbc18532478b122ea9834e67f1fd9 100644 (file)
@@ -46,49 +46,24 @@ static struct page *pq_scribble_page;
  * do_async_gen_syndrome - asynchronously calculate P and/or Q
  */
 static __async_inline struct dma_async_tx_descriptor *
-do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
-                     const unsigned char *scfs, unsigned int offset, int disks,
-                     size_t len, dma_addr_t *dma_src,
+do_async_gen_syndrome(struct dma_chan *chan,
+                     const unsigned char *scfs, int disks,
+                     struct dmaengine_unmap_data *unmap,
+                     enum dma_ctrl_flags dma_flags,
                      struct async_submit_ctl *submit)
 {
        struct dma_async_tx_descriptor *tx = NULL;
        struct dma_device *dma = chan->device;
-       enum dma_ctrl_flags dma_flags = 0;
        enum async_tx_flags flags_orig = submit->flags;
        dma_async_tx_callback cb_fn_orig = submit->cb_fn;
        dma_async_tx_callback cb_param_orig = submit->cb_param;
        int src_cnt = disks - 2;
-       unsigned char coefs[src_cnt];
        unsigned short pq_src_cnt;
        dma_addr_t dma_dest[2];
        int src_off = 0;
-       int idx;
-       int i;
 
-       /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
-       if (P(blocks, disks))
-               dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
-                                          len, DMA_BIDIRECTIONAL);
-       else
-               dma_flags |= DMA_PREP_PQ_DISABLE_P;
-       if (Q(blocks, disks))
-               dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
-                                          len, DMA_BIDIRECTIONAL);
-       else
-               dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-
-       /* convert source addresses being careful to collapse 'empty'
-        * sources and update the coefficients accordingly
-        */
-       for (i = 0, idx = 0; i < src_cnt; i++) {
-               if (blocks[i] == NULL)
-                       continue;
-               dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
-                                           DMA_TO_DEVICE);
-               coefs[idx] = scfs[i];
-               idx++;
-       }
-       src_cnt = idx;
+       if (submit->flags & ASYNC_TX_FENCE)
+               dma_flags |= DMA_PREP_FENCE;
 
        while (src_cnt > 0) {
                submit->flags = flags_orig;
@@ -100,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
                if (src_cnt > pq_src_cnt) {
                        submit->flags &= ~ASYNC_TX_ACK;
                        submit->flags |= ASYNC_TX_FENCE;
-                       dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
                        submit->cb_fn = NULL;
                        submit->cb_param = NULL;
                } else {
-                       dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
                        submit->cb_fn = cb_fn_orig;
                        submit->cb_param = cb_param_orig;
                        if (cb_fn_orig)
                                dma_flags |= DMA_PREP_INTERRUPT;
                }
-               if (submit->flags & ASYNC_TX_FENCE)
-                       dma_flags |= DMA_PREP_FENCE;
 
-               /* Since we have clobbered the src_list we are committed
-                * to doing this asynchronously.  Drivers force forward
-                * progress in case they can not provide a descriptor
+               /* Drivers force forward progress in case they can not provide
+                * a descriptor
                 */
                for (;;) {
+                       dma_dest[0] = unmap->addr[disks - 2];
+                       dma_dest[1] = unmap->addr[disks - 1];
                        tx = dma->device_prep_dma_pq(chan, dma_dest,
-                                                    &dma_src[src_off],
+                                                    &unmap->addr[src_off],
                                                     pq_src_cnt,
-                                                    &coefs[src_off], len,
+                                                    &scfs[src_off], unmap->len,
                                                     dma_flags);
                        if (likely(tx))
                                break;
@@ -129,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
                        dma_async_issue_pending(chan);
                }
 
+               dma_set_unmap(tx, unmap);
                async_tx_submit(chan, tx, submit);
                submit->depend_tx = tx;
 
@@ -188,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
  * set to NULL those buffers will be replaced with the raid6_zero_page
  * in the synchronous path and omitted in the hardware-asynchronous
  * path.
- *
- * 'blocks' note: if submit->scribble is NULL then the contents of
- * 'blocks' may be overwritten to perform address conversions
- * (dma_map_page() or page_address()).
  */
 struct dma_async_tx_descriptor *
 async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
@@ -202,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
                                                      &P(blocks, disks), 2,
                                                      blocks, src_cnt, len);
        struct dma_device *device = chan ? chan->device : NULL;
-       dma_addr_t *dma_src = NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
 
        BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
 
-       if (submit->scribble)
-               dma_src = submit->scribble;
-       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-               dma_src = (dma_addr_t *) blocks;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
 
-       if (dma_src && device &&
+       if (unmap &&
            (src_cnt <= dma_maxpq(device, 0) ||
             dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
            is_dma_pq_aligned(device, offset, 0, len)) {
+               struct dma_async_tx_descriptor *tx;
+               enum dma_ctrl_flags dma_flags = 0;
+               unsigned char coefs[src_cnt];
+               int i, j;
+
                /* run the p+q asynchronously */
                pr_debug("%s: (async) disks: %d len: %zu\n",
                         __func__, disks, len);
-               return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
-                                            disks, len, dma_src, submit);
+
+               /* convert source addresses being careful to collapse 'empty'
+                * sources and update the coefficients accordingly
+                */
+               unmap->len = len;
+               for (i = 0, j = 0; i < src_cnt; i++) {
+                       if (blocks[i] == NULL)
+                               continue;
+                       unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
+                                                     len, DMA_TO_DEVICE);
+                       coefs[j] = raid6_gfexp[i];
+                       unmap->to_cnt++;
+                       j++;
+               }
+
+               /*
+                * DMAs use destinations as sources,
+                * so use BIDIRECTIONAL mapping
+                */
+               unmap->bidi_cnt++;
+               if (P(blocks, disks))
+                       unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
+                                                       offset, len, DMA_BIDIRECTIONAL);
+               else {
+                       unmap->addr[j++] = 0;
+                       dma_flags |= DMA_PREP_PQ_DISABLE_P;
+               }
+
+               unmap->bidi_cnt++;
+               if (Q(blocks, disks))
+                       unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
+                                                      offset, len, DMA_BIDIRECTIONAL);
+               else {
+                       unmap->addr[j++] = 0;
+                       dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+               }
+
+               tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit);
+               dmaengine_unmap_put(unmap);
+               return tx;
        }
 
+       dmaengine_unmap_put(unmap);
+
        /* run the pq synchronously */
        pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
 
@@ -277,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
        struct dma_async_tx_descriptor *tx;
        unsigned char coefs[disks-2];
        enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
-       dma_addr_t *dma_src = NULL;
-       int src_cnt = 0;
+       struct dmaengine_unmap_data *unmap = NULL;
 
        BUG_ON(disks < 4);
 
-       if (submit->scribble)
-               dma_src = submit->scribble;
-       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-               dma_src = (dma_addr_t *) blocks;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
 
-       if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+       if (unmap && disks <= dma_maxpq(device, 0) &&
            is_dma_pq_aligned(device, offset, 0, len)) {
                struct device *dev = device->dev;
-               dma_addr_t *pq = &dma_src[disks-2];
-               int i;
+               dma_addr_t pq[2];
+               int i, j = 0, src_cnt = 0;
 
                pr_debug("%s: (async) disks: %d len: %zu\n",
                         __func__, disks, len);
-               if (!P(blocks, disks))
+
+               unmap->len = len;
+               for (i = 0; i < disks-2; i++)
+                       if (likely(blocks[i])) {
+                               unmap->addr[j] = dma_map_page(dev, blocks[i],
+                                                             offset, len,
+                                                             DMA_TO_DEVICE);
+                               coefs[j] = raid6_gfexp[i];
+                               unmap->to_cnt++;
+                               src_cnt++;
+                               j++;
+                       }
+
+               if (!P(blocks, disks)) {
+                       pq[0] = 0;
                        dma_flags |= DMA_PREP_PQ_DISABLE_P;
-               else
+               } else {
                        pq[0] = dma_map_page(dev, P(blocks, disks),
                                             offset, len,
                                             DMA_TO_DEVICE);
-               if (!Q(blocks, disks))
+                       unmap->addr[j++] = pq[0];
+                       unmap->to_cnt++;
+               }
+               if (!Q(blocks, disks)) {
+                       pq[1] = 0;
                        dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-               else
+               } else {
                        pq[1] = dma_map_page(dev, Q(blocks, disks),
                                             offset, len,
                                             DMA_TO_DEVICE);
+                       unmap->addr[j++] = pq[1];
+                       unmap->to_cnt++;
+               }
 
                if (submit->flags & ASYNC_TX_FENCE)
                        dma_flags |= DMA_PREP_FENCE;
-               for (i = 0; i < disks-2; i++)
-                       if (likely(blocks[i])) {
-                               dma_src[src_cnt] = dma_map_page(dev, blocks[i],
-                                                               offset, len,
-                                                               DMA_TO_DEVICE);
-                               coefs[src_cnt] = raid6_gfexp[i];
-                               src_cnt++;
-                       }
-
                for (;;) {
-                       tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+                       tx = device->device_prep_dma_pq_val(chan, pq,
+                                                           unmap->addr,
                                                            src_cnt,
                                                            coefs,
                                                            len, pqres,
@@ -330,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
                        async_tx_quiesce(&submit->depend_tx);
                        dma_async_issue_pending(chan);
                }
+
+               dma_set_unmap(tx, unmap);
                async_tx_submit(chan, tx, submit);
 
                return tx;
index a9f08a6a582ebccce298f718d0bbf5db8b1e1a7a..934a849814958e6ea37b9dbdb96abc820c4fe9e1 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/raid/pq.h>
 #include <linux/async_tx.h>
+#include <linux/dmaengine.h>
 
 static struct dma_async_tx_descriptor *
 async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
@@ -34,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
        struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
                                                      &dest, 1, srcs, 2, len);
        struct dma_device *dma = chan ? chan->device : NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
        const u8 *amul, *bmul;
        u8 ax, bx;
        u8 *a, *b, *c;
 
-       if (dma) {
-               dma_addr_t dma_dest[2];
-               dma_addr_t dma_src[2];
+       if (dma)
+               unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+       if (unmap) {
                struct device *dev = dma->dev;
+               dma_addr_t pq[2];
                struct dma_async_tx_descriptor *tx;
                enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
                if (submit->flags & ASYNC_TX_FENCE)
                        dma_flags |= DMA_PREP_FENCE;
-               dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
-               dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
-               dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
-               tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+               unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+               unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+               unmap->to_cnt = 2;
+
+               unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+               unmap->bidi_cnt = 1;
+               /* engine only looks at Q, but expects it to follow P */
+               pq[1] = unmap->addr[2];
+
+               unmap->len = len;
+               tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef,
                                             len, dma_flags);
                if (tx) {
+                       dma_set_unmap(tx, unmap);
                        async_tx_submit(chan, tx, submit);
+                       dmaengine_unmap_put(unmap);
                        return tx;
                }
 
                /* could not get a descriptor, unmap and fall through to
                 * the synchronous path
                 */
-               dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
-               dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
-               dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+               dmaengine_unmap_put(unmap);
        }
 
        /* run the operation synchronously */
@@ -89,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
        struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
                                                      &dest, 1, &src, 1, len);
        struct dma_device *dma = chan ? chan->device : NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
        const u8 *qmul; /* Q multiplier table */
        u8 *d, *s;
 
-       if (dma) {
+       if (dma)
+               unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+       if (unmap) {
                dma_addr_t dma_dest[2];
-               dma_addr_t dma_src[1];
                struct device *dev = dma->dev;
                struct dma_async_tx_descriptor *tx;
                enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
                if (submit->flags & ASYNC_TX_FENCE)
                        dma_flags |= DMA_PREP_FENCE;
-               dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
-               dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
-               tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
-                                            len, dma_flags);
+               unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+               unmap->to_cnt++;
+               unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+               dma_dest[1] = unmap->addr[1];
+               unmap->bidi_cnt++;
+               unmap->len = len;
+
+               /* this looks funny, but the engine looks for Q at
+                * dma_dest[1] and ignores dma_dest[0] as a dest
+                * due to DMA_PREP_PQ_DISABLE_P
+                */
+               tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr,
+                                            1, &coef, len, dma_flags);
+
                if (tx) {
+                       dma_set_unmap(tx, unmap);
+                       dmaengine_unmap_put(unmap);
                        async_tx_submit(chan, tx, submit);
                        return tx;
                }
@@ -113,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
                /* could not get a descriptor, unmap and fall through to
                 * the synchronous path
                 */
-               dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
-               dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+               dmaengine_unmap_put(unmap);
        }
 
        /* no channel available, or failed to allocate a descriptor, so
index 7be34248b450896cfc056d1709513df4f172f79a..39ea4791a3c977ad7eda47498b9f7d7741b438e2 100644 (file)
@@ -128,7 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
                }
                device->device_issue_pending(chan);
        } else {
-               if (dma_wait_for_async_tx(depend_tx) != DMA_SUCCESS)
+               if (dma_wait_for_async_tx(depend_tx) != DMA_COMPLETE)
                        panic("%s: DMA error waiting for depend_tx\n",
                              __func__);
                tx->tx_submit(tx);
@@ -280,7 +280,7 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
                 * we are referring to the correct operation
                 */
                BUG_ON(async_tx_test_ack(*tx));
-               if (dma_wait_for_async_tx(*tx) != DMA_SUCCESS)
+               if (dma_wait_for_async_tx(*tx) != DMA_COMPLETE)
                        panic("%s: DMA error waiting for transaction\n",
                              __func__);
                async_tx_ack(*tx);
index 8ade0a0481c67149e72d7878deb306a0fd33e5c2..3c562f5a60bbb34f19e6b90f4d858b04d3d496e2 100644 (file)
 
 /* do_async_xor - dma map the pages and perform the xor with an engine */
 static __async_inline struct dma_async_tx_descriptor *
-do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
-            unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
             struct async_submit_ctl *submit)
 {
        struct dma_device *dma = chan->device;
        struct dma_async_tx_descriptor *tx = NULL;
-       int src_off = 0;
-       int i;
        dma_async_tx_callback cb_fn_orig = submit->cb_fn;
        void *cb_param_orig = submit->cb_param;
        enum async_tx_flags flags_orig = submit->flags;
-       enum dma_ctrl_flags dma_flags;
-       int xor_src_cnt = 0;
-       dma_addr_t dma_dest;
-
-       /* map the dest bidrectional in case it is re-used as a source */
-       dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL);
-       for (i = 0; i < src_cnt; i++) {
-               /* only map the dest once */
-               if (!src_list[i])
-                       continue;
-               if (unlikely(src_list[i] == dest)) {
-                       dma_src[xor_src_cnt++] = dma_dest;
-                       continue;
-               }
-               dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset,
-                                                     len, DMA_TO_DEVICE);
-       }
-       src_cnt = xor_src_cnt;
+       enum dma_ctrl_flags dma_flags = 0;
+       int src_cnt = unmap->to_cnt;
+       int xor_src_cnt;
+       dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
+       dma_addr_t *src_list = unmap->addr;
 
        while (src_cnt) {
+               dma_addr_t tmp;
+
                submit->flags = flags_orig;
-               dma_flags = 0;
                xor_src_cnt = min(src_cnt, (int)dma->max_xor);
-               /* if we are submitting additional xors, leave the chain open,
-                * clear the callback parameters, and leave the destination
-                * buffer mapped
+               /* if we are submitting additional xors, leave the chain open
+                * and clear the callback parameters
                 */
                if (src_cnt > xor_src_cnt) {
                        submit->flags &= ~ASYNC_TX_ACK;
                        submit->flags |= ASYNC_TX_FENCE;
-                       dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
                        submit->cb_fn = NULL;
                        submit->cb_param = NULL;
                } else {
@@ -85,12 +68,18 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
                        dma_flags |= DMA_PREP_INTERRUPT;
                if (submit->flags & ASYNC_TX_FENCE)
                        dma_flags |= DMA_PREP_FENCE;
-               /* Since we have clobbered the src_list we are committed
-                * to doing this asynchronously.  Drivers force forward progress
-                * in case they can not provide a descriptor
+
+               /* Drivers force forward progress in case they can not provide a
+                * descriptor
                 */
-               tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
-                                             xor_src_cnt, len, dma_flags);
+               tmp = src_list[0];
+               if (src_list > unmap->addr)
+                       src_list[0] = dma_dest;
+               tx = dma->device_prep_dma_xor(chan, dma_dest, src_list,
+                                             xor_src_cnt, unmap->len,
+                                             dma_flags);
+               src_list[0] = tmp;
+
 
                if (unlikely(!tx))
                        async_tx_quiesce(&submit->depend_tx);
@@ -99,22 +88,21 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
                while (unlikely(!tx)) {
                        dma_async_issue_pending(chan);
                        tx = dma->device_prep_dma_xor(chan, dma_dest,
-                                                     &dma_src[src_off],
-                                                     xor_src_cnt, len,
+                                                     src_list,
+                                                     xor_src_cnt, unmap->len,
                                                      dma_flags);
                }
 
+               dma_set_unmap(tx, unmap);
                async_tx_submit(chan, tx, submit);
                submit->depend_tx = tx;
 
                if (src_cnt > xor_src_cnt) {
                        /* drop completed sources */
                        src_cnt -= xor_src_cnt;
-                       src_off += xor_src_cnt;
-
                        /* use the intermediate result a source */
-                       dma_src[--src_off] = dma_dest;
                        src_cnt++;
+                       src_list += xor_src_cnt - 1;
                } else
                        break;
        }
@@ -189,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
        struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
                                                      &dest, 1, src_list,
                                                      src_cnt, len);
-       dma_addr_t *dma_src = NULL;
+       struct dma_device *device = chan ? chan->device : NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
 
        BUG_ON(src_cnt <= 1);
 
-       if (submit->scribble)
-               dma_src = submit->scribble;
-       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-               dma_src = (dma_addr_t *) src_list;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
+
+       if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
+               struct dma_async_tx_descriptor *tx;
+               int i, j;
 
-       if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
                /* run the xor asynchronously */
                pr_debug("%s (async): len: %zu\n", __func__, len);
 
-               return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-                                   dma_src, submit);
+               unmap->len = len;
+               for (i = 0, j = 0; i < src_cnt; i++) {
+                       if (!src_list[i])
+                               continue;
+                       unmap->to_cnt++;
+                       unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
+                                                       offset, len, DMA_TO_DEVICE);
+               }
+
+               /* map it bidirectional as it may be re-used as a source */
+               unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
+                                             DMA_BIDIRECTIONAL);
+               unmap->bidi_cnt = 1;
+
+               tx = do_async_xor(chan, unmap, submit);
+               dmaengine_unmap_put(unmap);
+               return tx;
        } else {
+               dmaengine_unmap_put(unmap);
                /* run the xor synchronously */
                pr_debug("%s (sync): len: %zu\n", __func__, len);
                WARN_ONCE(chan, "%s: no space for dma address conversion\n",
@@ -268,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
        struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);
        struct dma_device *device = chan ? chan->device : NULL;
        struct dma_async_tx_descriptor *tx = NULL;
-       dma_addr_t *dma_src = NULL;
+       struct dmaengine_unmap_data *unmap = NULL;
 
        BUG_ON(src_cnt <= 1);
 
-       if (submit->scribble)
-               dma_src = submit->scribble;
-       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-               dma_src = (dma_addr_t *) src_list;
+       if (device)
+               unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
 
-       if (dma_src && device && src_cnt <= device->max_xor &&
+       if (unmap && src_cnt <= device->max_xor &&
            is_dma_xor_aligned(device, offset, 0, len)) {
                unsigned long dma_prep_flags = 0;
                int i;
@@ -288,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
                        dma_prep_flags |= DMA_PREP_INTERRUPT;
                if (submit->flags & ASYNC_TX_FENCE)
                        dma_prep_flags |= DMA_PREP_FENCE;
-               for (i = 0; i < src_cnt; i++)
-                       dma_src[i] = dma_map_page(device->dev, src_list[i],
-                                                 offset, len, DMA_TO_DEVICE);
 
-               tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+               for (i = 0; i < src_cnt; i++) {
+                       unmap->addr[i] = dma_map_page(device->dev, src_list[i],
+                                                     offset, len, DMA_TO_DEVICE);
+                       unmap->to_cnt++;
+               }
+               unmap->len = len;
+
+               tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,
                                                     len, result,
                                                     dma_prep_flags);
                if (unlikely(!tx)) {
@@ -301,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
                        while (!tx) {
                                dma_async_issue_pending(chan);
                                tx = device->device_prep_dma_xor_val(chan,
-                                       dma_src, src_cnt, len, result,
+                                       unmap->addr, src_cnt, len, result,
                                        dma_prep_flags);
                        }
                }
-
+               dma_set_unmap(tx, unmap);
                async_tx_submit(chan, tx, submit);
        } else {
                enum async_tx_flags flags_orig = submit->flags;
@@ -327,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
                async_tx_sync_epilog(submit);
                submit->flags = flags_orig;
        }
+       dmaengine_unmap_put(unmap);
 
        return tx;
 }
index 4a92bac744dce500a209f5e4e6033d465eff5dc9..dad95f45b88f6566afc62df645151d3a2ae60092 100644 (file)
@@ -28,7 +28,7 @@
 #undef pr
 #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
 
-#define NDISKS 16 /* Including P and Q */
+#define NDISKS 64 /* Including P and Q */
 
 static struct page *dataptrs[NDISKS];
 static addr_conv_t addr_conv[NDISKS];
@@ -219,6 +219,14 @@ static int raid6_test(void)
                err += test(11, &tests);
                err += test(12, &tests);
        }
+
+       /* the 24 disk case is special for ioatdma as it is the boudary point
+        * at which it needs to switch from 8-source ops to 16-source
+        * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
+        */
+       if (NDISKS > 24)
+               err += test(24, &tests);
+
        err += test(NDISKS, &tests);
 
        pr("\n");
diff --git a/crypto/hash_info.c b/crypto/hash_info.c
new file mode 100644 (file)
index 0000000..3e7ff46
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/export.h>
+#include <crypto/hash_info.h>
+
+const char *const hash_algo_name[HASH_ALGO__LAST] = {
+       [HASH_ALGO_MD4]         = "md4",
+       [HASH_ALGO_MD5]         = "md5",
+       [HASH_ALGO_SHA1]        = "sha1",
+       [HASH_ALGO_RIPE_MD_160] = "rmd160",
+       [HASH_ALGO_SHA256]      = "sha256",
+       [HASH_ALGO_SHA384]      = "sha384",
+       [HASH_ALGO_SHA512]      = "sha512",
+       [HASH_ALGO_SHA224]      = "sha224",
+       [HASH_ALGO_RIPE_MD_128] = "rmd128",
+       [HASH_ALGO_RIPE_MD_256] = "rmd256",
+       [HASH_ALGO_RIPE_MD_320] = "rmd320",
+       [HASH_ALGO_WP_256]      = "wp256",
+       [HASH_ALGO_WP_384]      = "wp384",
+       [HASH_ALGO_WP_512]      = "wp512",
+       [HASH_ALGO_TGR_128]     = "tgr128",
+       [HASH_ALGO_TGR_160]     = "tgr160",
+       [HASH_ALGO_TGR_192]     = "tgr192",
+};
+EXPORT_SYMBOL_GPL(hash_algo_name);
+
+const int hash_digest_size[HASH_ALGO__LAST] = {
+       [HASH_ALGO_MD4]         = MD5_DIGEST_SIZE,
+       [HASH_ALGO_MD5]         = MD5_DIGEST_SIZE,
+       [HASH_ALGO_SHA1]        = SHA1_DIGEST_SIZE,
+       [HASH_ALGO_RIPE_MD_160] = RMD160_DIGEST_SIZE,
+       [HASH_ALGO_SHA256]      = SHA256_DIGEST_SIZE,
+       [HASH_ALGO_SHA384]      = SHA384_DIGEST_SIZE,
+       [HASH_ALGO_SHA512]      = SHA512_DIGEST_SIZE,
+       [HASH_ALGO_SHA224]      = SHA224_DIGEST_SIZE,
+       [HASH_ALGO_RIPE_MD_128] = RMD128_DIGEST_SIZE,
+       [HASH_ALGO_RIPE_MD_256] = RMD256_DIGEST_SIZE,
+       [HASH_ALGO_RIPE_MD_320] = RMD320_DIGEST_SIZE,
+       [HASH_ALGO_WP_256]      = WP256_DIGEST_SIZE,
+       [HASH_ALGO_WP_384]      = WP384_DIGEST_SIZE,
+       [HASH_ALGO_WP_512]      = WP512_DIGEST_SIZE,
+       [HASH_ALGO_TGR_128]     = TGR128_DIGEST_SIZE,
+       [HASH_ALGO_TGR_160]     = TGR160_DIGEST_SIZE,
+       [HASH_ALGO_TGR_192]     = TGR192_DIGEST_SIZE,
+};
+EXPORT_SYMBOL_GPL(hash_digest_size);
index c95df0b8c8808e505f9dbfc7c8bcc68da48af57d..5d9248526d780b06c51bee967cdd088c9c90e5e5 100644 (file)
@@ -235,17 +235,6 @@ config ACPI_INITRD_TABLE_OVERRIDE
          initrd, therefore it's safe to say Y.
          See Documentation/acpi/initrd_table_override.txt for details
 
-config ACPI_BLACKLIST_YEAR
-       int "Disable ACPI for systems before Jan 1st this year" if X86_32
-       default 0
-       help
-         Enter a 4-digit year, e.g., 2001, to disable ACPI by default
-         on platforms with DMI BIOS date before January 1st that year.
-         "acpi=force" can be used to override this mechanism.
-
-         Enter 0 to disable this mechanism and allow ACPI to
-         run by default no matter what the year.  (default)
-
 config ACPI_DEBUG
        bool "Debug Statements"
        default n
index b9f0d5f4bba51cecb6ffe8fda126762850818263..8711e3797165fa73fd0c401cedbb54c61eb68e4a 100644 (file)
@@ -56,7 +56,6 @@ static int ac_sleep_before_get_state_ms;
 
 struct acpi_ac {
        struct power_supply charger;
-       struct acpi_device *adev;
        struct platform_device *pdev;
        unsigned long long state;
 };
@@ -70,8 +69,9 @@ struct acpi_ac {
 static int acpi_ac_get_state(struct acpi_ac *ac)
 {
        acpi_status status;
+       acpi_handle handle = ACPI_HANDLE(&ac->pdev->dev);
 
-       status = acpi_evaluate_integer(ac->adev->handle, "_PSR", NULL,
+       status = acpi_evaluate_integer(handle, "_PSR", NULL,
                                       &ac->state);
        if (ACPI_FAILURE(status)) {
                ACPI_EXCEPTION((AE_INFO, status,
@@ -119,6 +119,7 @@ static enum power_supply_property ac_props[] = {
 static void acpi_ac_notify_handler(acpi_handle handle, u32 event, void *data)
 {
        struct acpi_ac *ac = data;
+       struct acpi_device *adev;
 
        if (!ac)
                return;
@@ -141,10 +142,11 @@ static void acpi_ac_notify_handler(acpi_handle handle, u32 event, void *data)
                        msleep(ac_sleep_before_get_state_ms);
 
                acpi_ac_get_state(ac);
-               acpi_bus_generate_netlink_event(ac->adev->pnp.device_class,
+               adev = ACPI_COMPANION(&ac->pdev->dev);
+               acpi_bus_generate_netlink_event(adev->pnp.device_class,
                                                dev_name(&ac->pdev->dev),
                                                event, (u32) ac->state);
-               acpi_notifier_call_chain(ac->adev, event, (u32) ac->state);
+               acpi_notifier_call_chain(adev, event, (u32) ac->state);
                kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE);
        }
 
@@ -178,8 +180,8 @@ static int acpi_ac_probe(struct platform_device *pdev)
        if (!pdev)
                return -EINVAL;
 
-       result = acpi_bus_get_device(ACPI_HANDLE(&pdev->dev), &adev);
-       if (result)
+       adev = ACPI_COMPANION(&pdev->dev);
+       if (!adev)
                return -ENODEV;
 
        ac = kzalloc(sizeof(struct acpi_ac), GFP_KERNEL);
@@ -188,7 +190,6 @@ static int acpi_ac_probe(struct platform_device *pdev)
 
        strcpy(acpi_device_name(adev), ACPI_AC_DEVICE_NAME);
        strcpy(acpi_device_class(adev), ACPI_AC_CLASS);
-       ac->adev = adev;
        ac->pdev = pdev;
        platform_set_drvdata(pdev, ac);
 
index d3961014aad7ff9d77bcb296ec2f851f96d64b5b..6745fe137b9ea541ae729429035eaeca8fc8fc07 100644 (file)
@@ -163,6 +163,15 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = {
        { "80860F41", (unsigned long)&byt_i2c_dev_desc },
        { "INT33B2", },
 
+       { "INT3430", (unsigned long)&lpt_dev_desc },
+       { "INT3431", (unsigned long)&lpt_dev_desc },
+       { "INT3432", (unsigned long)&lpt_dev_desc },
+       { "INT3433", (unsigned long)&lpt_dev_desc },
+       { "INT3434", (unsigned long)&lpt_uart_dev_desc },
+       { "INT3435", (unsigned long)&lpt_uart_dev_desc },
+       { "INT3436", (unsigned long)&lpt_sdio_dev_desc },
+       { "INT3437", },
+
        { }
 };
 
index 8a4cfc7e71f0f83cc7a8644e75eab627c8e3c52d..dbfe49e5fd63cc179559b2c5caee57d27324c012 100644 (file)
@@ -111,7 +111,7 @@ int acpi_create_platform_device(struct acpi_device *adev,
        pdevinfo.id = -1;
        pdevinfo.res = resources;
        pdevinfo.num_res = count;
-       pdevinfo.acpi_node.handle = adev->handle;
+       pdevinfo.acpi_node.companion = adev;
        pdev = platform_device_register_full(&pdevinfo);
        if (IS_ERR(pdev)) {
                dev_err(&adev->dev, "platform device creation failed: %ld\n",
index fb848378d5824691af9e84b4f100a01e47df7d39..078c4f7fe2dd97c42d3e13ea11ebde804e3ae4f0 100644 (file)
@@ -75,39 +75,6 @@ static struct acpi_blacklist_item acpi_blacklist[] __initdata = {
        {""}
 };
 
-#if    CONFIG_ACPI_BLACKLIST_YEAR
-
-static int __init blacklist_by_year(void)
-{
-       int year;
-
-       /* Doesn't exist? Likely an old system */
-       if (!dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL)) {
-               printk(KERN_ERR PREFIX "no DMI BIOS year, "
-                       "acpi=force is required to enable ACPI\n" );
-               return 1;
-       }
-       /* 0? Likely a buggy new BIOS */
-       if (year == 0) {
-               printk(KERN_ERR PREFIX "DMI BIOS year==0, "
-                       "assuming ACPI-capable machine\n" );
-               return 0;
-       }
-       if (year < CONFIG_ACPI_BLACKLIST_YEAR) {
-               printk(KERN_ERR PREFIX "BIOS age (%d) fails cutoff (%d), "
-                      "acpi=force is required to enable ACPI\n",
-                      year, CONFIG_ACPI_BLACKLIST_YEAR);
-               return 1;
-       }
-       return 0;
-}
-#else
-static inline int blacklist_by_year(void)
-{
-       return 0;
-}
-#endif
-
 int __init acpi_blacklisted(void)
 {
        int i = 0;
@@ -166,8 +133,6 @@ int __init acpi_blacklisted(void)
                }
        }
 
-       blacklisted += blacklist_by_year();
-
        dmi_check_system(acpi_osi_dmi_table);
 
        return blacklisted;
index d42b2fb5a7e94131ce2633ef95f4d7631729325f..b3480cf7db1a1d1eba0e8d884cb456f92b76b660 100644 (file)
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
-#include <linux/device.h>
+#include <linux/acpi.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
-
 #include "internal.h"
 
 #define _COMPONENT     ACPI_POWER_COMPONENT
@@ -548,7 +544,7 @@ static int acpi_dev_pm_get_state(struct device *dev, struct acpi_device *adev,
  */
 int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in)
 {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+       acpi_handle handle = ACPI_HANDLE(dev);
        struct acpi_device *adev;
        int ret, d_min, d_max;
 
@@ -656,7 +652,7 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
        if (!device_run_wake(phys_dev))
                return -EINVAL;
 
-       handle = DEVICE_ACPI_HANDLE(phys_dev);
+       handle = ACPI_HANDLE(phys_dev);
        if (!handle || acpi_bus_get_device(handle, &adev)) {
                dev_dbg(phys_dev, "ACPI handle without context in %s!\n",
                        __func__);
@@ -700,7 +696,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
        if (!device_can_wakeup(dev))
                return -EINVAL;
 
-       handle = DEVICE_ACPI_HANDLE(dev);
+       handle = ACPI_HANDLE(dev);
        if (!handle || acpi_bus_get_device(handle, &adev)) {
                dev_dbg(dev, "ACPI handle without context in %s!\n", __func__);
                return -ENODEV;
@@ -722,7 +718,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
  */
 struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
 {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+       acpi_handle handle = ACPI_HANDLE(dev);
        struct acpi_device *adev;
 
        return handle && !acpi_bus_get_device(handle, &adev) ? adev : NULL;
index d5309fd494589b4d1596cb1c03576ed1e7ef252e..ba5b56db9d27c7fafa3b19c6d1f5d2549308aea6 100644 (file)
@@ -173,9 +173,10 @@ static void start_transaction(struct acpi_ec *ec)
 static void advance_transaction(struct acpi_ec *ec, u8 status)
 {
        unsigned long flags;
-       struct transaction *t = ec->curr;
+       struct transaction *t;
 
        spin_lock_irqsave(&ec->lock, flags);
+       t = ec->curr;
        if (!t)
                goto unlock;
        if (t->wlen > t->wi) {
index 10f0f40587bb73309eee9e959fc1049cbaf6dc05..a22a295edb692347f16066bff26d145973ab2523 100644 (file)
@@ -197,30 +197,28 @@ static void acpi_physnode_link_name(char *buf, unsigned int node_id)
 
 int acpi_bind_one(struct device *dev, acpi_handle handle)
 {
-       struct acpi_device *acpi_dev;
-       acpi_status status;
+       struct acpi_device *acpi_dev = NULL;
        struct acpi_device_physical_node *physical_node, *pn;
        char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
        struct list_head *physnode_list;
        unsigned int node_id;
        int retval = -EINVAL;
 
-       if (ACPI_HANDLE(dev)) {
+       if (ACPI_COMPANION(dev)) {
                if (handle) {
-                       dev_warn(dev, "ACPI handle is already set\n");
+                       dev_warn(dev, "ACPI companion already set\n");
                        return -EINVAL;
                } else {
-                       handle = ACPI_HANDLE(dev);
+                       acpi_dev = ACPI_COMPANION(dev);
                }
+       } else {
+               acpi_bus_get_device(handle, &acpi_dev);
        }
-       if (!handle)
+       if (!acpi_dev)
                return -EINVAL;
 
+       get_device(&acpi_dev->dev);
        get_device(dev);
-       status = acpi_bus_get_device(handle, &acpi_dev);
-       if (ACPI_FAILURE(status))
-               goto err;
-
        physical_node = kzalloc(sizeof(*physical_node), GFP_KERNEL);
        if (!physical_node) {
                retval = -ENOMEM;
@@ -242,10 +240,11 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
 
                        dev_warn(dev, "Already associated with ACPI node\n");
                        kfree(physical_node);
-                       if (ACPI_HANDLE(dev) != handle)
+                       if (ACPI_COMPANION(dev) != acpi_dev)
                                goto err;
 
                        put_device(dev);
+                       put_device(&acpi_dev->dev);
                        return 0;
                }
                if (pn->node_id == node_id) {
@@ -259,8 +258,8 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
        list_add(&physical_node->node, physnode_list);
        acpi_dev->physical_node_count++;
 
-       if (!ACPI_HANDLE(dev))
-               ACPI_HANDLE_SET(dev, acpi_dev->handle);
+       if (!ACPI_COMPANION(dev))
+               ACPI_COMPANION_SET(dev, acpi_dev);
 
        acpi_physnode_link_name(physical_node_name, node_id);
        retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
@@ -283,27 +282,21 @@ int acpi_bind_one(struct device *dev, acpi_handle handle)
        return 0;
 
  err:
-       ACPI_HANDLE_SET(dev, NULL);
+       ACPI_COMPANION_SET(dev, NULL);
        put_device(dev);
+       put_device(&acpi_dev->dev);
        return retval;
 }
 EXPORT_SYMBOL_GPL(acpi_bind_one);
 
 int acpi_unbind_one(struct device *dev)
 {
+       struct acpi_device *acpi_dev = ACPI_COMPANION(dev);
        struct acpi_device_physical_node *entry;
-       struct acpi_device *acpi_dev;
-       acpi_status status;
 
-       if (!ACPI_HANDLE(dev))
+       if (!acpi_dev)
                return 0;
 
-       status = acpi_bus_get_device(ACPI_HANDLE(dev), &acpi_dev);
-       if (ACPI_FAILURE(status)) {
-               dev_err(dev, "Oops, ACPI handle corrupt in %s()\n", __func__);
-               return -EINVAL;
-       }
-
        mutex_lock(&acpi_dev->physical_node_lock);
 
        list_for_each_entry(entry, &acpi_dev->physical_node_list, node)
@@ -316,9 +309,10 @@ int acpi_unbind_one(struct device *dev)
                        acpi_physnode_link_name(physnode_name, entry->node_id);
                        sysfs_remove_link(&acpi_dev->dev.kobj, physnode_name);
                        sysfs_remove_link(&dev->kobj, "firmware_node");
-                       ACPI_HANDLE_SET(dev, NULL);
-                       /* acpi_bind_one() increase refcnt by one. */
+                       ACPI_COMPANION_SET(dev, NULL);
+                       /* Drop references taken by acpi_bind_one(). */
                        put_device(dev);
+                       put_device(&acpi_dev->dev);
                        kfree(entry);
                        break;
                }
@@ -328,6 +322,15 @@ int acpi_unbind_one(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(acpi_unbind_one);
 
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr)
+{
+       struct acpi_device *adev;
+
+       if (!acpi_bus_get_device(acpi_get_child(parent, addr), &adev))
+               ACPI_COMPANION_SET(dev, adev);
+}
+EXPORT_SYMBOL_GPL(acpi_preset_companion);
+
 static int acpi_platform_notify(struct device *dev)
 {
        struct acpi_bus_type *type = acpi_get_bus_type(dev);
index 56f05869b08df2ab89bafd661d97b282c36d0b8b..0703bff5e60ecaf2f9d207f9ffc1691d50380384 100644 (file)
@@ -575,6 +575,7 @@ static int acpi_pci_root_add(struct acpi_device *device,
                dev_err(&device->dev,
                        "Bus %04x:%02x not present in PCI namespace\n",
                        root->segment, (unsigned int)root->secondary.start);
+               device->driver_data = NULL;
                result = -ENODEV;
                goto end;
        }
index 55f9dedbbf9fedc5aae2cf21852c7474514cd0d1..15daa21fcd056cf75b50eccf34a08482e0e24e8a 100644 (file)
@@ -289,24 +289,17 @@ void acpi_bus_device_eject(void *data, u32 ost_src)
 {
        struct acpi_device *device = data;
        acpi_handle handle = device->handle;
-       struct acpi_scan_handler *handler;
        u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
        int error;
 
        lock_device_hotplug();
        mutex_lock(&acpi_scan_lock);
 
-       handler = device->handler;
-       if (!handler || !handler->hotplug.enabled) {
-               put_device(&device->dev);
-               goto err_support;
-       }
-
        if (ost_src == ACPI_NOTIFY_EJECT_REQUEST)
                acpi_evaluate_hotplug_ost(handle, ACPI_NOTIFY_EJECT_REQUEST,
                                          ACPI_OST_SC_EJECT_IN_PROGRESS, NULL);
 
-       if (handler->hotplug.mode == AHM_CONTAINER)
+       if (device->handler && device->handler->hotplug.mode == AHM_CONTAINER)
                kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
 
        error = acpi_scan_hot_remove(device);
@@ -411,8 +404,7 @@ static void acpi_hotplug_notify_cb(acpi_handle handle, u32 type, void *data)
                break;
        case ACPI_NOTIFY_EJECT_REQUEST:
                acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
-               status = acpi_bus_get_device(handle, &adev);
-               if (ACPI_FAILURE(status))
+               if (acpi_bus_get_device(handle, &adev))
                        goto err_out;
 
                get_device(&adev->dev);
@@ -1997,6 +1989,7 @@ static int acpi_bus_scan_fixed(void)
                if (result)
                        return result;
 
+               device->flags.match_driver = true;
                result = device_attach(&device->dev);
                if (result < 0)
                        return result;
@@ -2013,6 +2006,7 @@ static int acpi_bus_scan_fixed(void)
                if (result)
                        return result;
 
+               device->flags.match_driver = true;
                result = device_attach(&device->dev);
        }
 
index 18dbdff4656e7d6c6b8f665e9fcdb2e22067cf98..995e91bcb97b7b4d5f21d585de1f5a7e46bd9ebd 100644 (file)
@@ -81,13 +81,6 @@ module_param(brightness_switch_enabled, bool, 0644);
 static bool allow_duplicates;
 module_param(allow_duplicates, bool, 0644);
 
-/*
- * Some BIOSes claim they use minimum backlight at boot,
- * and this may bring dimming screen after boot
- */
-static bool use_bios_initial_backlight = 1;
-module_param(use_bios_initial_backlight, bool, 0644);
-
 /*
  * For Windows 8 systems: if set ture and the GPU driver has
  * registered a backlight interface, skip registering ACPI video's.
@@ -406,12 +399,6 @@ static int __init video_set_bqc_offset(const struct dmi_system_id *d)
        return 0;
 }
 
-static int video_ignore_initial_backlight(const struct dmi_system_id *d)
-{
-       use_bios_initial_backlight = 0;
-       return 0;
-}
-
 static struct dmi_system_id video_dmi_table[] __initdata = {
        /*
         * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121
@@ -456,54 +443,6 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
                DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7720"),
                },
        },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP Folio 13-2000",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "Fujitsu E753",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "FUJITSU"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E753"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP Pavilion dm4",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dm4 Notebook PC"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP Pavilion g6 Notebook PC",
-        .matches = {
-                DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-                DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion g6 Notebook PC"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP 1000 Notebook PC",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "HP 1000 Notebook PC"),
-               },
-       },
-       {
-        .callback = video_ignore_initial_backlight,
-        .ident = "HP Pavilion m4",
-        .matches = {
-               DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-               DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion m4 Notebook PC"),
-               },
-       },
        {}
 };
 
@@ -839,20 +778,18 @@ acpi_video_init_brightness(struct acpi_video_device *device)
        if (!device->cap._BQC)
                goto set_level;
 
-       if (use_bios_initial_backlight) {
-               level = acpi_video_bqc_value_to_level(device, level_old);
-               /*
-                * On some buggy laptops, _BQC returns an uninitialized
-                * value when invoked for the first time, i.e.
-                * level_old is invalid (no matter whether it's a level
-                * or an index). Set the backlight to max_level in this case.
-                */
-               for (i = 2; i < br->count; i++)
-                       if (level == br->levels[i])
-                               break;
-               if (i == br->count || !level)
-                       level = max_level;
-       }
+       level = acpi_video_bqc_value_to_level(device, level_old);
+       /*
+        * On some buggy laptops, _BQC returns an uninitialized
+        * value when invoked for the first time, i.e.
+        * level_old is invalid (no matter whether it's a level
+        * or an index). Set the backlight to max_level in this case.
+        */
+       for (i = 2; i < br->count; i++)
+               if (level == br->levels[i])
+                       break;
+       if (i == br->count || !level)
+               level = max_level;
 
 set_level:
        result = acpi_video_device_lcd_set_level(device, level);
index ab714d2ad978644752ca3c9bdff74ae1c9f63934..4372cfa883c9c36cf2624f88186ef3e0e7b27d31 100644 (file)
@@ -185,7 +185,7 @@ void ata_acpi_bind_port(struct ata_port *ap)
        if (libata_noacpi || ap->flags & ATA_FLAG_ACPI_SATA || !host_handle)
                return;
 
-       ACPI_HANDLE_SET(&ap->tdev, acpi_get_child(host_handle, ap->port_no));
+       acpi_preset_companion(&ap->tdev, host_handle, ap->port_no);
 
        if (ata_acpi_gtm(ap, &ap->__acpi_init_gtm) == 0)
                ap->pflags |= ATA_PFLAG_INIT_GTM_VALID;
@@ -222,7 +222,7 @@ void ata_acpi_bind_dev(struct ata_device *dev)
                parent_handle = port_handle;
        }
 
-       ACPI_HANDLE_SET(&dev->tdev, acpi_get_child(parent_handle, adr));
+       acpi_preset_companion(&dev->tdev, parent_handle, adr);
 
        register_hotplug_dock_device(ata_dev_acpi_handle(dev),
                                     &ata_acpi_dev_dock_ops, dev, NULL, NULL);
index 853f610af28fbc9dff0ee59d1fa1e6688cbfd732..e88690ebfd827b8a02558c4b611b474a4f8c4a1e 100644 (file)
@@ -396,8 +396,7 @@ dma_xfer(struct arasan_cf_dev *acdev, dma_addr_t src, dma_addr_t dest, u32 len)
        struct dma_async_tx_descriptor *tx;
        struct dma_chan *chan = acdev->dma_chan;
        dma_cookie_t cookie;
-       unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-               DMA_COMPL_SKIP_DEST_UNMAP;
+       unsigned long flags = DMA_PREP_INTERRUPT;
        int ret = 0;
 
        tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags);
index 47051cd251132ce97752bc8f8f9748be272c0f35..3a94b799f16640eb6a8e34ef2df78a31e3085e9c 100644 (file)
@@ -432,7 +432,7 @@ struct platform_device *platform_device_register_full(
                goto err_alloc;
 
        pdev->dev.parent = pdevinfo->parent;
-       ACPI_HANDLE_SET(&pdev->dev, pdevinfo->acpi_node.handle);
+       ACPI_COMPANION_SET(&pdev->dev, pdevinfo->acpi_node.companion);
 
        if (pdevinfo->dma_mask) {
                /*
@@ -463,7 +463,7 @@ struct platform_device *platform_device_register_full(
        ret = platform_device_add(pdev);
        if (ret) {
 err:
-               ACPI_HANDLE_SET(&pdev->dev, NULL);
+               ACPI_COMPANION_SET(&pdev->dev, NULL);
                kfree(pdev->dev.dma_mask);
 
 err_alloc:
index c12e9b9556be7d5d49d5d78c9de09eb39ce8a0f8..1b41fca3d65a54545c6c124e0df696998c29a1af 100644 (file)
@@ -1350,6 +1350,9 @@ static int device_prepare(struct device *dev, pm_message_t state)
 
        device_unlock(dev);
 
+       if (error)
+               pm_runtime_put(dev);
+
        return error;
 }
 
index b5d842370cc9e75bfcf67f38ca6c03ddb259e8fe..ea192ec029c45bf7354d8184b44e1eb460cdbf77 100644 (file)
@@ -223,7 +223,7 @@ static void null_softirq_done_fn(struct request *rq)
        blk_end_request_all(rq, 0);
 }
 
-#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#ifdef CONFIG_SMP
 
 static void null_ipi_cmd_end_io(void *data)
 {
@@ -260,7 +260,7 @@ static void null_cmd_end_ipi(struct nullb_cmd *cmd)
        put_cpu();
 }
 
-#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+#endif /* CONFIG_SMP */
 
 static inline void null_handle_cmd(struct nullb_cmd *cmd)
 {
@@ -270,7 +270,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
                end_cmd(cmd);
                break;
        case NULL_IRQ_SOFTIRQ:
-#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#ifdef CONFIG_SMP
                null_cmd_end_ipi(cmd);
 #else
                end_cmd(cmd);
@@ -571,7 +571,7 @@ static int __init null_init(void)
 {
        unsigned int i;
 
-#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+#if !defined(CONFIG_SMP)
        if (irqmode == NULL_IRQ_SOFTIRQ) {
                pr_warn("null_blk: softirq completions not available.\n");
                pr_warn("null_blk: using direct completions.\n");
index 588479d58f52f1496d8395c99ec410a3c1a10da3..6a680d4de7f1c3dcfa7999e45efc98b4b14d99fc 100644 (file)
@@ -199,15 +199,16 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 
        spin_lock_irqsave(&vblk->vq_lock, flags);
        if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
+               virtqueue_kick(vblk->vq);
                spin_unlock_irqrestore(&vblk->vq_lock, flags);
                blk_mq_stop_hw_queue(hctx);
-               virtqueue_kick(vblk->vq);
                return BLK_MQ_RQ_QUEUE_BUSY;
        }
-       spin_unlock_irqrestore(&vblk->vq_lock, flags);
 
        if (last)
                virtqueue_kick(vblk->vq);
+
+       spin_unlock_irqrestore(&vblk->vq_lock, flags);
        return BLK_MQ_RQ_QUEUE_OK;
 }
 
index 94c0c74434eac641a3ddfcd18324913ecedeaafa..1a65838888cdbb37ec2551fd132c7c91cc66c511 100644 (file)
@@ -33,6 +33,15 @@ config TCG_TIS
          from within Linux.  To compile this driver as a module, choose
          M here; the module will be called tpm_tis.
 
+config TCG_TIS_I2C_ATMEL
+       tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)"
+       depends on I2C
+       ---help---
+         If you have an Atmel I2C TPM security chip say Yes and it will be
+         accessible from within Linux.
+         To compile this driver as a module, choose M here; the module will
+         be called tpm_tis_i2c_atmel.
+
 config TCG_TIS_I2C_INFINEON
        tristate "TPM Interface Specification 1.2 Interface (I2C - Infineon)"
        depends on I2C
@@ -42,7 +51,17 @@ config TCG_TIS_I2C_INFINEON
          Specification 0.20 say Yes and it will be accessible from within
          Linux.
          To compile this driver as a module, choose M here; the module
-         will be called tpm_tis_i2c_infineon.
+         will be called tpm_i2c_infineon.
+
+config TCG_TIS_I2C_NUVOTON
+       tristate "TPM Interface Specification 1.2 Interface (I2C - Nuvoton)"
+       depends on I2C
+       ---help---
+         If you have a TPM security chip with an I2C interface from
+         Nuvoton Technology Corp. say Yes and it will be accessible
+         from within Linux.
+         To compile this driver as a module, choose M here; the module
+         will be called tpm_i2c_nuvoton.
 
 config TCG_NSC
        tristate "National Semiconductor TPM Interface"
@@ -82,14 +101,14 @@ config TCG_IBMVTPM
          as a module, choose M here; the module will be called tpm_ibmvtpm.
 
 config TCG_ST33_I2C
-        tristate "STMicroelectronics ST33 I2C TPM"
-        depends on I2C
-        depends on GPIOLIB
-        ---help---
-        If you have a TPM security chip from STMicroelectronics working with
-        an I2C bus say Yes and it will be accessible from within Linux.
-        To compile this driver as a module, choose M here; the module will be
-        called tpm_stm_st33_i2c.
+       tristate "STMicroelectronics ST33 I2C TPM"
+       depends on I2C
+       depends on GPIOLIB
+       ---help---
+         If you have a TPM security chip from STMicroelectronics working with
+         an I2C bus say Yes and it will be accessible from within Linux.
+         To compile this driver as a module, choose M here; the module will be
+         called tpm_stm_st33_i2c.
 
 config TCG_XEN
        tristate "XEN TPM Interface"
index eb41ff97d0ad13f577ff942364ffe8314a5a2999..b80a4000daeee7b72f7f945637544312473edeb6 100644 (file)
@@ -2,17 +2,20 @@
 # Makefile for the kernel tpm device drivers.
 #
 obj-$(CONFIG_TCG_TPM) += tpm.o
+tpm-y := tpm-interface.o
+tpm-$(CONFIG_ACPI) += tpm_ppi.o
+
 ifdef CONFIG_ACPI
-       obj-$(CONFIG_TCG_TPM) += tpm_bios.o
-       tpm_bios-objs += tpm_eventlog.o tpm_acpi.o tpm_ppi.o
+       tpm-y += tpm_eventlog.o tpm_acpi.o
 else
 ifdef CONFIG_TCG_IBMVTPM
-       obj-$(CONFIG_TCG_TPM) += tpm_bios.o
-       tpm_bios-objs += tpm_eventlog.o tpm_of.o
+       tpm-y += tpm_eventlog.o tpm_of.o
 endif
 endif
 obj-$(CONFIG_TCG_TIS) += tpm_tis.o
+obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o
 obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o
+obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o
 obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
 obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
new file mode 100644 (file)
index 0000000..6ae41d3
--- /dev/null
@@ -0,0 +1,1554 @@
+/*
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Authors:
+ * Leendert van Doorn <leendert@watson.ibm.com>
+ * Dave Safford <safford@watson.ibm.com>
+ * Reiner Sailer <sailer@watson.ibm.com>
+ * Kylene Hall <kjhall@us.ibm.com>
+ *
+ * Maintained by: <tpmdd-devel@lists.sourceforge.net>
+ *
+ * Device driver for TCG/TCPA TPM (trusted platform module).
+ * Specifications at www.trustedcomputinggroup.org
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * Note, the TPM chip is not interrupt driven (only polling)
+ * and can have very long timeouts (minutes!). Hence the unusual
+ * calls to msleep.
+ *
+ */
+
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/freezer.h>
+
+#include "tpm.h"
+#include "tpm_eventlog.h"
+
+enum tpm_duration {
+       TPM_SHORT = 0,
+       TPM_MEDIUM = 1,
+       TPM_LONG = 2,
+       TPM_UNDEFINED,
+};
+
+#define TPM_MAX_ORDINAL 243
+#define TSC_MAX_ORDINAL 12
+#define TPM_PROTECTED_COMMAND 0x00
+#define TPM_CONNECTION_COMMAND 0x40
+
+/*
+ * Bug workaround - some TPM's don't flush the most
+ * recently changed pcr on suspend, so force the flush
+ * with an extend to the selected _unused_ non-volatile pcr.
+ */
+static int tpm_suspend_pcr;
+module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644);
+MODULE_PARM_DESC(suspend_pcr,
+                "PCR to use for dummy writes to faciltate flush on suspend.");
+
+static LIST_HEAD(tpm_chip_list);
+static DEFINE_SPINLOCK(driver_lock);
+static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
+
+/*
+ * Array with one entry per ordinal defining the maximum amount
+ * of time the chip could take to return the result.  The ordinal
+ * designation of short, medium or long is defined in a table in
+ * TCG Specification TPM Main Part 2 TPM Structures Section 17. The
+ * values of the SHORT, MEDIUM, and LONG durations are retrieved
+ * from the chip during initialization with a call to tpm_get_timeouts.
+ */
+static const u8 tpm_ordinal_duration[TPM_MAX_ORDINAL] = {
+       TPM_UNDEFINED,          /* 0 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 5 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 10 */
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_LONG,
+       TPM_LONG,
+       TPM_MEDIUM,             /* 15 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_LONG,
+       TPM_SHORT,              /* 20 */
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_SHORT,              /* 25 */
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_MEDIUM,             /* 30 */
+       TPM_LONG,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,              /* 35 */
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_MEDIUM,             /* 40 */
+       TPM_LONG,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,              /* 45 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_LONG,
+       TPM_MEDIUM,             /* 50 */
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 55 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_MEDIUM,             /* 60 */
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_MEDIUM,             /* 65 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 70 */
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 75 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_LONG,               /* 80 */
+       TPM_UNDEFINED,
+       TPM_MEDIUM,
+       TPM_LONG,
+       TPM_SHORT,
+       TPM_UNDEFINED,          /* 85 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 90 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,          /* 95 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_MEDIUM,             /* 100 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 105 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 110 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,              /* 115 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_LONG,               /* 120 */
+       TPM_LONG,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_SHORT,
+       TPM_SHORT,              /* 125 */
+       TPM_SHORT,
+       TPM_LONG,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,              /* 130 */
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,          /* 135 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 140 */
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 145 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 150 */
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,          /* 155 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 160 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 165 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_LONG,               /* 170 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 175 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_MEDIUM,             /* 180 */
+       TPM_SHORT,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_MEDIUM,             /* 185 */
+       TPM_SHORT,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 190 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 195 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 200 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,
+       TPM_SHORT,              /* 205 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_MEDIUM,             /* 210 */
+       TPM_UNDEFINED,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,          /* 215 */
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,
+       TPM_SHORT,              /* 220 */
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_SHORT,
+       TPM_UNDEFINED,          /* 225 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 230 */
+       TPM_LONG,
+       TPM_MEDIUM,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,          /* 235 */
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_UNDEFINED,
+       TPM_SHORT,              /* 240 */
+       TPM_UNDEFINED,
+       TPM_MEDIUM,
+};
+
+static void user_reader_timeout(unsigned long ptr)
+{
+       struct tpm_chip *chip = (struct tpm_chip *) ptr;
+
+       schedule_work(&chip->work);
+}
+
+static void timeout_work(struct work_struct *work)
+{
+       struct tpm_chip *chip = container_of(work, struct tpm_chip, work);
+
+       mutex_lock(&chip->buffer_mutex);
+       atomic_set(&chip->data_pending, 0);
+       memset(chip->data_buffer, 0, TPM_BUFSIZE);
+       mutex_unlock(&chip->buffer_mutex);
+}
+
+/*
+ * Returns max number of jiffies to wait
+ */
+unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip,
+                                          u32 ordinal)
+{
+       int duration_idx = TPM_UNDEFINED;
+       int duration = 0;
+       u8 category = (ordinal >> 24) & 0xFF;
+
+       if ((category == TPM_PROTECTED_COMMAND && ordinal < TPM_MAX_ORDINAL) ||
+           (category == TPM_CONNECTION_COMMAND && ordinal < TSC_MAX_ORDINAL))
+               duration_idx = tpm_ordinal_duration[ordinal];
+
+       if (duration_idx != TPM_UNDEFINED)
+               duration = chip->vendor.duration[duration_idx];
+       if (duration <= 0)
+               return 2 * 60 * HZ;
+       else
+               return duration;
+}
+EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
+
+/*
+ * Internal kernel interface to transmit TPM commands
+ */
+static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
+                           size_t bufsiz)
+{
+       ssize_t rc;
+       u32 count, ordinal;
+       unsigned long stop;
+
+       if (bufsiz > TPM_BUFSIZE)
+               bufsiz = TPM_BUFSIZE;
+
+       count = be32_to_cpu(*((__be32 *) (buf + 2)));
+       ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
+       if (count == 0)
+               return -ENODATA;
+       if (count > bufsiz) {
+               dev_err(chip->dev,
+                       "invalid count value %x %zx\n", count, bufsiz);
+               return -E2BIG;
+       }
+
+       mutex_lock(&chip->tpm_mutex);
+
+       rc = chip->vendor.send(chip, (u8 *) buf, count);
+       if (rc < 0) {
+               dev_err(chip->dev,
+                       "tpm_transmit: tpm_send: error %zd\n", rc);
+               goto out;
+       }
+
+       if (chip->vendor.irq)
+               goto out_recv;
+
+       stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal);
+       do {
+               u8 status = chip->vendor.status(chip);
+               if ((status & chip->vendor.req_complete_mask) ==
+                   chip->vendor.req_complete_val)
+                       goto out_recv;
+
+               if (chip->vendor.req_canceled(chip, status)) {
+                       dev_err(chip->dev, "Operation Canceled\n");
+                       rc = -ECANCELED;
+                       goto out;
+               }
+
+               msleep(TPM_TIMEOUT);    /* CHECK */
+               rmb();
+       } while (time_before(jiffies, stop));
+
+       chip->vendor.cancel(chip);
+       dev_err(chip->dev, "Operation Timed out\n");
+       rc = -ETIME;
+       goto out;
+
+out_recv:
+       rc = chip->vendor.recv(chip, (u8 *) buf, bufsiz);
+       if (rc < 0)
+               dev_err(chip->dev,
+                       "tpm_transmit: tpm_recv: error %zd\n", rc);
+out:
+       mutex_unlock(&chip->tpm_mutex);
+       return rc;
+}
+
+#define TPM_DIGEST_SIZE 20
+#define TPM_RET_CODE_IDX 6
+
+enum tpm_capabilities {
+       TPM_CAP_FLAG = cpu_to_be32(4),
+       TPM_CAP_PROP = cpu_to_be32(5),
+       CAP_VERSION_1_1 = cpu_to_be32(0x06),
+       CAP_VERSION_1_2 = cpu_to_be32(0x1A)
+};
+
+enum tpm_sub_capabilities {
+       TPM_CAP_PROP_PCR = cpu_to_be32(0x101),
+       TPM_CAP_PROP_MANUFACTURER = cpu_to_be32(0x103),
+       TPM_CAP_FLAG_PERM = cpu_to_be32(0x108),
+       TPM_CAP_FLAG_VOL = cpu_to_be32(0x109),
+       TPM_CAP_PROP_OWNER = cpu_to_be32(0x111),
+       TPM_CAP_PROP_TIS_TIMEOUT = cpu_to_be32(0x115),
+       TPM_CAP_PROP_TIS_DURATION = cpu_to_be32(0x120),
+
+};
+
+static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
+                           int len, const char *desc)
+{
+       int err;
+
+       len = tpm_transmit(chip, (u8 *) cmd, len);
+       if (len <  0)
+               return len;
+       else if (len < TPM_HEADER_SIZE)
+               return -EFAULT;
+
+       err = be32_to_cpu(cmd->header.out.return_code);
+       if (err != 0 && desc)
+               dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc);
+
+       return err;
+}
+
+#define TPM_INTERNAL_RESULT_SIZE 200
+#define TPM_TAG_RQU_COMMAND cpu_to_be16(193)
+#define TPM_ORD_GET_CAP cpu_to_be32(101)
+#define TPM_ORD_GET_RANDOM cpu_to_be32(70)
+
+static const struct tpm_input_header tpm_getcap_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(22),
+       .ordinal = TPM_ORD_GET_CAP
+};
+
+ssize_t tpm_getcap(struct device *dev, __be32 subcap_id, cap_t *cap,
+                  const char *desc)
+{
+       struct tpm_cmd_t tpm_cmd;
+       int rc;
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       tpm_cmd.header.in = tpm_getcap_header;
+       if (subcap_id == CAP_VERSION_1_1 || subcap_id == CAP_VERSION_1_2) {
+               tpm_cmd.params.getcap_in.cap = subcap_id;
+               /*subcap field not necessary */
+               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(0);
+               tpm_cmd.header.in.length -= cpu_to_be32(sizeof(__be32));
+       } else {
+               if (subcap_id == TPM_CAP_FLAG_PERM ||
+                   subcap_id == TPM_CAP_FLAG_VOL)
+                       tpm_cmd.params.getcap_in.cap = TPM_CAP_FLAG;
+               else
+                       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+               tpm_cmd.params.getcap_in.subcap = subcap_id;
+       }
+       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc);
+       if (!rc)
+               *cap = tpm_cmd.params.getcap_out.cap;
+       return rc;
+}
+
+void tpm_gen_interrupt(struct tpm_chip *chip)
+{
+       struct  tpm_cmd_t tpm_cmd;
+       ssize_t rc;
+
+       tpm_cmd.header.in = tpm_getcap_header;
+       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+
+       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+                       "attempting to determine the timeouts");
+}
+EXPORT_SYMBOL_GPL(tpm_gen_interrupt);
+
+#define TPM_ORD_STARTUP cpu_to_be32(153)
+#define TPM_ST_CLEAR cpu_to_be16(1)
+#define TPM_ST_STATE cpu_to_be16(2)
+#define TPM_ST_DEACTIVATED cpu_to_be16(3)
+static const struct tpm_input_header tpm_startup_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(12),
+       .ordinal = TPM_ORD_STARTUP
+};
+
+static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
+{
+       struct tpm_cmd_t start_cmd;
+       start_cmd.header.in = tpm_startup_header;
+       start_cmd.params.startup_in.startup_type = startup_type;
+       return transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
+                           "attempting to start the TPM");
+}
+
+int tpm_get_timeouts(struct tpm_chip *chip)
+{
+       struct tpm_cmd_t tpm_cmd;
+       struct timeout_t *timeout_cap;
+       struct duration_t *duration_cap;
+       ssize_t rc;
+       u32 timeout;
+       unsigned int scale = 1;
+
+       tpm_cmd.header.in = tpm_getcap_header;
+       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL);
+
+       if (rc == TPM_ERR_INVALID_POSTINIT) {
+               /* The TPM is not started, we are the first to talk to it.
+                  Execute a startup command. */
+               dev_info(chip->dev, "Issuing TPM_STARTUP");
+               if (tpm_startup(chip, TPM_ST_CLEAR))
+                       return rc;
+
+               tpm_cmd.header.in = tpm_getcap_header;
+               tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+               tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
+               rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+                                 NULL);
+       }
+       if (rc) {
+               dev_err(chip->dev,
+                       "A TPM error (%zd) occurred attempting to determine the timeouts\n",
+                       rc);
+               goto duration;
+       }
+
+       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+           be32_to_cpu(tpm_cmd.header.out.length)
+           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
+               return -EINVAL;
+
+       timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
+       /* Don't overwrite default if value is 0 */
+       timeout = be32_to_cpu(timeout_cap->a);
+       if (timeout && timeout < 1000) {
+               /* timeouts in msec rather usec */
+               scale = 1000;
+               chip->vendor.timeout_adjusted = true;
+       }
+       if (timeout)
+               chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
+       timeout = be32_to_cpu(timeout_cap->b);
+       if (timeout)
+               chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
+       timeout = be32_to_cpu(timeout_cap->c);
+       if (timeout)
+               chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
+       timeout = be32_to_cpu(timeout_cap->d);
+       if (timeout)
+               chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
+
+duration:
+       tpm_cmd.header.in = tpm_getcap_header;
+       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
+       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
+       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION;
+
+       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+                       "attempting to determine the durations");
+       if (rc)
+               return rc;
+
+       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+           be32_to_cpu(tpm_cmd.header.out.length)
+           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
+               return -EINVAL;
+
+       duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
+       chip->vendor.duration[TPM_SHORT] =
+           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
+       chip->vendor.duration[TPM_MEDIUM] =
+           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium));
+       chip->vendor.duration[TPM_LONG] =
+           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long));
+
+       /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
+        * value wrong and apparently reports msecs rather than usecs. So we
+        * fix up the resulting too-small TPM_SHORT value to make things work.
+        * We also scale the TPM_MEDIUM and -_LONG values by 1000.
+        */
+       if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) {
+               chip->vendor.duration[TPM_SHORT] = HZ;
+               chip->vendor.duration[TPM_MEDIUM] *= 1000;
+               chip->vendor.duration[TPM_LONG] *= 1000;
+               chip->vendor.duration_adjusted = true;
+               dev_info(chip->dev, "Adjusting TPM timeout parameters.");
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tpm_get_timeouts);
+
+#define TPM_ORD_CONTINUE_SELFTEST 83
+#define CONTINUE_SELFTEST_RESULT_SIZE 10
+
+static struct tpm_input_header continue_selftest_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(10),
+       .ordinal = cpu_to_be32(TPM_ORD_CONTINUE_SELFTEST),
+};
+
+/**
+ * tpm_continue_selftest -- run TPM's selftest
+ * @chip: TPM chip to use
+ *
+ * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing
+ * a TPM error code.
+ */
+static int tpm_continue_selftest(struct tpm_chip *chip)
+{
+       int rc;
+       struct tpm_cmd_t cmd;
+
+       cmd.header.in = continue_selftest_header;
+       rc = transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE,
+                         "continue selftest");
+       return rc;
+}
+
+ssize_t tpm_show_enabled(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+
+       rc = tpm_getcap(dev, TPM_CAP_FLAG_PERM, &cap,
+                        "attempting to determine the permanent enabled state");
+       if (rc)
+               return 0;
+
+       rc = sprintf(buf, "%d\n", !cap.perm_flags.disable);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_enabled);
+
+ssize_t tpm_show_active(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+
+       rc = tpm_getcap(dev, TPM_CAP_FLAG_PERM, &cap,
+                        "attempting to determine the permanent active state");
+       if (rc)
+               return 0;
+
+       rc = sprintf(buf, "%d\n", !cap.perm_flags.deactivated);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_active);
+
+ssize_t tpm_show_owned(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+
+       rc = tpm_getcap(dev, TPM_CAP_PROP_OWNER, &cap,
+                        "attempting to determine the owner state");
+       if (rc)
+               return 0;
+
+       rc = sprintf(buf, "%d\n", cap.owned);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_owned);
+
+ssize_t tpm_show_temp_deactivated(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+
+       rc = tpm_getcap(dev, TPM_CAP_FLAG_VOL, &cap,
+                        "attempting to determine the temporary state");
+       if (rc)
+               return 0;
+
+       rc = sprintf(buf, "%d\n", cap.stclear_flags.deactivated);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_temp_deactivated);
+
+/*
+ * tpm_chip_find_get - return tpm_chip for given chip number
+ */
+static struct tpm_chip *tpm_chip_find_get(int chip_num)
+{
+       struct tpm_chip *pos, *chip = NULL;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
+               if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num)
+                       continue;
+
+               if (try_module_get(pos->dev->driver->owner)) {
+                       chip = pos;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return chip;
+}
+
+#define TPM_ORDINAL_PCRREAD cpu_to_be32(21)
+#define READ_PCR_RESULT_SIZE 30
+static struct tpm_input_header pcrread_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(14),
+       .ordinal = TPM_ORDINAL_PCRREAD
+};
+
+static int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
+{
+       int rc;
+       struct tpm_cmd_t cmd;
+
+       cmd.header.in = pcrread_header;
+       cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
+       rc = transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE,
+                         "attempting to read a pcr value");
+
+       if (rc == 0)
+               memcpy(res_buf, cmd.params.pcrread_out.pcr_result,
+                      TPM_DIGEST_SIZE);
+       return rc;
+}
+
+/**
+ * tpm_pcr_read - read a pcr value
+ * @chip_num:  tpm idx # or ANY
+ * @pcr_idx:   pcr idx to retrieve
+ * @res_buf:   TPM_PCR value
+ *             size of res_buf is 20 bytes (or NULL if you don't care)
+ *
+ * The TPM driver should be built-in, but for whatever reason it
+ * isn't, protect against the chip disappearing, by incrementing
+ * the module usage count.
+ */
+int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf)
+{
+       struct tpm_chip *chip;
+       int rc;
+
+       chip = tpm_chip_find_get(chip_num);
+       if (chip == NULL)
+               return -ENODEV;
+       rc = __tpm_pcr_read(chip, pcr_idx, res_buf);
+       tpm_chip_put(chip);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_pcr_read);
+
+/**
+ * tpm_pcr_extend - extend pcr value with hash
+ * @chip_num:  tpm idx # or AN&
+ * @pcr_idx:   pcr idx to extend
+ * @hash:      hash value used to extend pcr value
+ *
+ * The TPM driver should be built-in, but for whatever reason it
+ * isn't, protect against the chip disappearing, by incrementing
+ * the module usage count.
+ */
+#define TPM_ORD_PCR_EXTEND cpu_to_be32(20)
+#define EXTEND_PCR_RESULT_SIZE 34
+static struct tpm_input_header pcrextend_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(34),
+       .ordinal = TPM_ORD_PCR_EXTEND
+};
+
+int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
+{
+       struct tpm_cmd_t cmd;
+       int rc;
+       struct tpm_chip *chip;
+
+       chip = tpm_chip_find_get(chip_num);
+       if (chip == NULL)
+               return -ENODEV;
+
+       cmd.header.in = pcrextend_header;
+       cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
+       memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
+       rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
+                         "attempting extend a PCR value");
+
+       tpm_chip_put(chip);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_pcr_extend);
+
+/**
+ * tpm_do_selftest - have the TPM continue its selftest and wait until it
+ *                   can receive further commands
+ * @chip: TPM chip to use
+ *
+ * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing
+ * a TPM error code.
+ */
+int tpm_do_selftest(struct tpm_chip *chip)
+{
+       int rc;
+       unsigned int loops;
+       unsigned int delay_msec = 100;
+       unsigned long duration;
+       struct tpm_cmd_t cmd;
+
+       duration = tpm_calc_ordinal_duration(chip, TPM_ORD_CONTINUE_SELFTEST);
+
+       loops = jiffies_to_msecs(duration) / delay_msec;
+
+       rc = tpm_continue_selftest(chip);
+       /* This may fail if there was no TPM driver during a suspend/resume
+        * cycle; some may return 10 (BAD_ORDINAL), others 28 (FAILEDSELFTEST)
+        */
+       if (rc)
+               return rc;
+
+       do {
+               /* Attempt to read a PCR value */
+               cmd.header.in = pcrread_header;
+               cmd.params.pcrread_in.pcr_idx = cpu_to_be32(0);
+               rc = tpm_transmit(chip, (u8 *) &cmd, READ_PCR_RESULT_SIZE);
+               /* Some buggy TPMs will not respond to tpm_tis_ready() for
+                * around 300ms while the self test is ongoing, keep trying
+                * until the self test duration expires. */
+               if (rc == -ETIME) {
+                       dev_info(chip->dev, HW_ERR "TPM command timed out during continue self test");
+                       msleep(delay_msec);
+                       continue;
+               }
+
+               if (rc < TPM_HEADER_SIZE)
+                       return -EFAULT;
+
+               rc = be32_to_cpu(cmd.header.out.return_code);
+               if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) {
+                       dev_info(chip->dev,
+                                "TPM is disabled/deactivated (0x%X)\n", rc);
+                       /* TPM is disabled and/or deactivated; driver can
+                        * proceed and TPM does handle commands for
+                        * suspend/resume correctly
+                        */
+                       return 0;
+               }
+               if (rc != TPM_WARN_DOING_SELFTEST)
+                       return rc;
+               msleep(delay_msec);
+       } while (--loops > 0);
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_do_selftest);
+
+int tpm_send(u32 chip_num, void *cmd, size_t buflen)
+{
+       struct tpm_chip *chip;
+       int rc;
+
+       chip = tpm_chip_find_get(chip_num);
+       if (chip == NULL)
+               return -ENODEV;
+
+       rc = transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
+
+       tpm_chip_put(chip);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_send);
+
+ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr,
+                     char *buf)
+{
+       cap_t cap;
+       u8 digest[TPM_DIGEST_SIZE];
+       ssize_t rc;
+       int i, j, num_pcrs;
+       char *str = buf;
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       rc = tpm_getcap(dev, TPM_CAP_PROP_PCR, &cap,
+                       "attempting to determine the number of PCRS");
+       if (rc)
+               return 0;
+
+       num_pcrs = be32_to_cpu(cap.num_pcrs);
+       for (i = 0; i < num_pcrs; i++) {
+               rc = __tpm_pcr_read(chip, i, digest);
+               if (rc)
+                       break;
+               str += sprintf(str, "PCR-%02d: ", i);
+               for (j = 0; j < TPM_DIGEST_SIZE; j++)
+                       str += sprintf(str, "%02X ", digest[j]);
+               str += sprintf(str, "\n");
+       }
+       return str - buf;
+}
+EXPORT_SYMBOL_GPL(tpm_show_pcrs);
+
+#define  READ_PUBEK_RESULT_SIZE 314
+#define TPM_ORD_READPUBEK cpu_to_be32(124)
+static struct tpm_input_header tpm_readpubek_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(30),
+       .ordinal = TPM_ORD_READPUBEK
+};
+
+ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr,
+                      char *buf)
+{
+       u8 *data;
+       struct tpm_cmd_t tpm_cmd;
+       ssize_t err;
+       int i, rc;
+       char *str = buf;
+
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       tpm_cmd.header.in = tpm_readpubek_header;
+       err = transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
+                       "attempting to read the PUBEK");
+       if (err)
+               goto out;
+
+       /*
+          ignore header 10 bytes
+          algorithm 32 bits (1 == RSA )
+          encscheme 16 bits
+          sigscheme 16 bits
+          parameters (RSA 12->bytes: keybit, #primes, expbit)
+          keylenbytes 32 bits
+          256 byte modulus
+          ignore checksum 20 bytes
+        */
+       data = tpm_cmd.params.readpubek_out_buffer;
+       str +=
+           sprintf(str,
+                   "Algorithm: %02X %02X %02X %02X\n"
+                   "Encscheme: %02X %02X\n"
+                   "Sigscheme: %02X %02X\n"
+                   "Parameters: %02X %02X %02X %02X "
+                   "%02X %02X %02X %02X "
+                   "%02X %02X %02X %02X\n"
+                   "Modulus length: %d\n"
+                   "Modulus:\n",
+                   data[0], data[1], data[2], data[3],
+                   data[4], data[5],
+                   data[6], data[7],
+                   data[12], data[13], data[14], data[15],
+                   data[16], data[17], data[18], data[19],
+                   data[20], data[21], data[22], data[23],
+                   be32_to_cpu(*((__be32 *) (data + 24))));
+
+       for (i = 0; i < 256; i++) {
+               str += sprintf(str, "%02X ", data[i + 28]);
+               if ((i + 1) % 16 == 0)
+                       str += sprintf(str, "\n");
+       }
+out:
+       rc = str - buf;
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_show_pubek);
+
+
+ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr,
+                     char *buf)
+{
+       cap_t cap;
+       ssize_t rc;
+       char *str = buf;
+
+       rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap,
+                       "attempting to determine the manufacturer");
+       if (rc)
+               return 0;
+       str += sprintf(str, "Manufacturer: 0x%x\n",
+                      be32_to_cpu(cap.manufacturer_id));
+
+       /* Try to get a TPM version 1.2 TPM_CAP_VERSION_INFO */
+       rc = tpm_getcap(dev, CAP_VERSION_1_2, &cap,
+                        "attempting to determine the 1.2 version");
+       if (!rc) {
+               str += sprintf(str,
+                              "TCG version: %d.%d\nFirmware version: %d.%d\n",
+                              cap.tpm_version_1_2.Major,
+                              cap.tpm_version_1_2.Minor,
+                              cap.tpm_version_1_2.revMajor,
+                              cap.tpm_version_1_2.revMinor);
+       } else {
+               /* Otherwise just use TPM_STRUCT_VER */
+               rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap,
+                               "attempting to determine the 1.1 version");
+               if (rc)
+                       return 0;
+               str += sprintf(str,
+                              "TCG version: %d.%d\nFirmware version: %d.%d\n",
+                              cap.tpm_version.Major,
+                              cap.tpm_version.Minor,
+                              cap.tpm_version.revMajor,
+                              cap.tpm_version.revMinor);
+       }
+
+       return str - buf;
+}
+EXPORT_SYMBOL_GPL(tpm_show_caps);
+
+ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip->vendor.duration[TPM_LONG] == 0)
+               return 0;
+
+       return sprintf(buf, "%d %d %d [%s]\n",
+                      jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
+                      jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
+                      jiffies_to_usecs(chip->vendor.duration[TPM_LONG]),
+                      chip->vendor.duration_adjusted
+                      ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_durations);
+
+ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d %d %d %d [%s]\n",
+                      jiffies_to_usecs(chip->vendor.timeout_a),
+                      jiffies_to_usecs(chip->vendor.timeout_b),
+                      jiffies_to_usecs(chip->vendor.timeout_c),
+                      jiffies_to_usecs(chip->vendor.timeout_d),
+                      chip->vendor.timeout_adjusted
+                      ? "adjusted" : "original");
+}
+EXPORT_SYMBOL_GPL(tpm_show_timeouts);
+
+ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
+                       const char *buf, size_t count)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+       if (chip == NULL)
+               return 0;
+
+       chip->vendor.cancel(chip);
+       return count;
+}
+EXPORT_SYMBOL_GPL(tpm_store_cancel);
+
+static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask,
+                                       bool check_cancel, bool *canceled)
+{
+       u8 status = chip->vendor.status(chip);
+
+       *canceled = false;
+       if ((status & mask) == mask)
+               return true;
+       if (check_cancel && chip->vendor.req_canceled(chip, status)) {
+               *canceled = true;
+               return true;
+       }
+       return false;
+}
+
+int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
+                     wait_queue_head_t *queue, bool check_cancel)
+{
+       unsigned long stop;
+       long rc;
+       u8 status;
+       bool canceled = false;
+
+       /* check current status */
+       status = chip->vendor.status(chip);
+       if ((status & mask) == mask)
+               return 0;
+
+       stop = jiffies + timeout;
+
+       if (chip->vendor.irq) {
+again:
+               timeout = stop - jiffies;
+               if ((long)timeout <= 0)
+                       return -ETIME;
+               rc = wait_event_interruptible_timeout(*queue,
+                       wait_for_tpm_stat_cond(chip, mask, check_cancel,
+                                              &canceled),
+                       timeout);
+               if (rc > 0) {
+                       if (canceled)
+                               return -ECANCELED;
+                       return 0;
+               }
+               if (rc == -ERESTARTSYS && freezing(current)) {
+                       clear_thread_flag(TIF_SIGPENDING);
+                       goto again;
+               }
+       } else {
+               do {
+                       msleep(TPM_TIMEOUT);
+                       status = chip->vendor.status(chip);
+                       if ((status & mask) == mask)
+                               return 0;
+               } while (time_before(jiffies, stop));
+       }
+       return -ETIME;
+}
+EXPORT_SYMBOL_GPL(wait_for_tpm_stat);
+/*
+ * Device file system interface to the TPM
+ *
+ * It's assured that the chip will be opened just once,
+ * by the check of is_open variable, which is protected
+ * by driver_lock.
+ */
+int tpm_open(struct inode *inode, struct file *file)
+{
+       struct miscdevice *misc = file->private_data;
+       struct tpm_chip *chip = container_of(misc, struct tpm_chip,
+                                            vendor.miscdev);
+
+       if (test_and_set_bit(0, &chip->is_open)) {
+               dev_dbg(chip->dev, "Another process owns this TPM\n");
+               return -EBUSY;
+       }
+
+       chip->data_buffer = kzalloc(TPM_BUFSIZE, GFP_KERNEL);
+       if (chip->data_buffer == NULL) {
+               clear_bit(0, &chip->is_open);
+               return -ENOMEM;
+       }
+
+       atomic_set(&chip->data_pending, 0);
+
+       file->private_data = chip;
+       get_device(chip->dev);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tpm_open);
+
+/*
+ * Called on file close
+ */
+int tpm_release(struct inode *inode, struct file *file)
+{
+       struct tpm_chip *chip = file->private_data;
+
+       del_singleshot_timer_sync(&chip->user_read_timer);
+       flush_work(&chip->work);
+       file->private_data = NULL;
+       atomic_set(&chip->data_pending, 0);
+       kzfree(chip->data_buffer);
+       clear_bit(0, &chip->is_open);
+       put_device(chip->dev);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tpm_release);
+
+ssize_t tpm_write(struct file *file, const char __user *buf,
+                 size_t size, loff_t *off)
+{
+       struct tpm_chip *chip = file->private_data;
+       size_t in_size = size;
+       ssize_t out_size;
+
+       /* cannot perform a write until the read has cleared
+          either via tpm_read or a user_read_timer timeout.
+          This also prevents splitted buffered writes from blocking here.
+       */
+       if (atomic_read(&chip->data_pending) != 0)
+               return -EBUSY;
+
+       if (in_size > TPM_BUFSIZE)
+               return -E2BIG;
+
+       mutex_lock(&chip->buffer_mutex);
+
+       if (copy_from_user
+           (chip->data_buffer, (void __user *) buf, in_size)) {
+               mutex_unlock(&chip->buffer_mutex);
+               return -EFAULT;
+       }
+
+       /* atomic tpm command send and result receive */
+       out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
+       if (out_size < 0) {
+               mutex_unlock(&chip->buffer_mutex);
+               return out_size;
+       }
+
+       atomic_set(&chip->data_pending, out_size);
+       mutex_unlock(&chip->buffer_mutex);
+
+       /* Set a timeout by which the reader must come claim the result */
+       mod_timer(&chip->user_read_timer, jiffies + (60 * HZ));
+
+       return in_size;
+}
+EXPORT_SYMBOL_GPL(tpm_write);
+
+ssize_t tpm_read(struct file *file, char __user *buf,
+                size_t size, loff_t *off)
+{
+       struct tpm_chip *chip = file->private_data;
+       ssize_t ret_size;
+       int rc;
+
+       del_singleshot_timer_sync(&chip->user_read_timer);
+       flush_work(&chip->work);
+       ret_size = atomic_read(&chip->data_pending);
+       if (ret_size > 0) {     /* relay data */
+               ssize_t orig_ret_size = ret_size;
+               if (size < ret_size)
+                       ret_size = size;
+
+               mutex_lock(&chip->buffer_mutex);
+               rc = copy_to_user(buf, chip->data_buffer, ret_size);
+               memset(chip->data_buffer, 0, orig_ret_size);
+               if (rc)
+                       ret_size = -EFAULT;
+
+               mutex_unlock(&chip->buffer_mutex);
+       }
+
+       atomic_set(&chip->data_pending, 0);
+
+       return ret_size;
+}
+EXPORT_SYMBOL_GPL(tpm_read);
+
+void tpm_remove_hardware(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip == NULL) {
+               dev_err(dev, "No device data found\n");
+               return;
+       }
+
+       spin_lock(&driver_lock);
+       list_del_rcu(&chip->list);
+       spin_unlock(&driver_lock);
+       synchronize_rcu();
+
+       misc_deregister(&chip->vendor.miscdev);
+       sysfs_remove_group(&dev->kobj, chip->vendor.attr_group);
+       tpm_remove_ppi(&dev->kobj);
+       tpm_bios_log_teardown(chip->bios_dir);
+
+       /* write it this way to be explicit (chip->dev == dev) */
+       put_device(chip->dev);
+}
+EXPORT_SYMBOL_GPL(tpm_remove_hardware);
+
+#define TPM_ORD_SAVESTATE cpu_to_be32(152)
+#define SAVESTATE_RESULT_SIZE 10
+
+static struct tpm_input_header savestate_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(10),
+       .ordinal = TPM_ORD_SAVESTATE
+};
+
+/*
+ * We are about to suspend. Save the TPM state
+ * so that it can be restored.
+ */
+int tpm_pm_suspend(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+       struct tpm_cmd_t cmd;
+       int rc, try;
+
+       u8 dummy_hash[TPM_DIGEST_SIZE] = { 0 };
+
+       if (chip == NULL)
+               return -ENODEV;
+
+       /* for buggy tpm, flush pcrs with extend to selected dummy */
+       if (tpm_suspend_pcr) {
+               cmd.header.in = pcrextend_header;
+               cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(tpm_suspend_pcr);
+               memcpy(cmd.params.pcrextend_in.hash, dummy_hash,
+                      TPM_DIGEST_SIZE);
+               rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
+                                 "extending dummy pcr before suspend");
+       }
+
+       /* now do the actual savestate */
+       for (try = 0; try < TPM_RETRY; try++) {
+               cmd.header.in = savestate_header;
+               rc = transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL);
+
+               /*
+                * If the TPM indicates that it is too busy to respond to
+                * this command then retry before giving up.  It can take
+                * several seconds for this TPM to be ready.
+                *
+                * This can happen if the TPM has already been sent the
+                * SaveState command before the driver has loaded.  TCG 1.2
+                * specification states that any communication after SaveState
+                * may cause the TPM to invalidate previously saved state.
+                */
+               if (rc != TPM_WARN_RETRY)
+                       break;
+               msleep(TPM_TIMEOUT_RETRY);
+       }
+
+       if (rc)
+               dev_err(chip->dev,
+                       "Error (%d) sending savestate before suspend\n", rc);
+       else if (try > 0)
+               dev_warn(chip->dev, "TPM savestate took %dms\n",
+                        try * TPM_TIMEOUT_RETRY);
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(tpm_pm_suspend);
+
+/*
+ * Resume from a power safe. The BIOS already restored
+ * the TPM state.
+ */
+int tpm_pm_resume(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip == NULL)
+               return -ENODEV;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(tpm_pm_resume);
+
+#define TPM_GETRANDOM_RESULT_SIZE      18
+static struct tpm_input_header tpm_getrandom_header = {
+       .tag = TPM_TAG_RQU_COMMAND,
+       .length = cpu_to_be32(14),
+       .ordinal = TPM_ORD_GET_RANDOM
+};
+
+/**
+ * tpm_get_random() - Get random bytes from the tpm's RNG
+ * @chip_num: A specific chip number for the request or TPM_ANY_NUM
+ * @out: destination buffer for the random bytes
+ * @max: the max number of bytes to write to @out
+ *
+ * Returns < 0 on error and the number of bytes read on success
+ */
+int tpm_get_random(u32 chip_num, u8 *out, size_t max)
+{
+       struct tpm_chip *chip;
+       struct tpm_cmd_t tpm_cmd;
+       u32 recd, num_bytes = min_t(u32, max, TPM_MAX_RNG_DATA);
+       int err, total = 0, retries = 5;
+       u8 *dest = out;
+
+       chip = tpm_chip_find_get(chip_num);
+       if (chip == NULL)
+               return -ENODEV;
+
+       if (!out || !num_bytes || max > TPM_MAX_RNG_DATA)
+               return -EINVAL;
+
+       do {
+               tpm_cmd.header.in = tpm_getrandom_header;
+               tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
+
+               err = transmit_cmd(chip, &tpm_cmd,
+                                  TPM_GETRANDOM_RESULT_SIZE + num_bytes,
+                                  "attempting get random");
+               if (err)
+                       break;
+
+               recd = be32_to_cpu(tpm_cmd.params.getrandom_out.rng_data_len);
+               memcpy(dest, tpm_cmd.params.getrandom_out.rng_data, recd);
+
+               dest += recd;
+               total += recd;
+               num_bytes -= recd;
+       } while (retries-- && total < max);
+
+       return total ? total : -EIO;
+}
+EXPORT_SYMBOL_GPL(tpm_get_random);
+
+/* In case vendor provided release function, call it too.*/
+
+void tpm_dev_vendor_release(struct tpm_chip *chip)
+{
+       if (!chip)
+               return;
+
+       if (chip->vendor.release)
+               chip->vendor.release(chip->dev);
+
+       clear_bit(chip->dev_num, dev_mask);
+}
+EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
+
+
+/*
+ * Once all references to platform device are down to 0,
+ * release all allocated structures.
+ */
+void tpm_dev_release(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (!chip)
+               return;
+
+       tpm_dev_vendor_release(chip);
+
+       chip->release(dev);
+       kfree(chip);
+}
+EXPORT_SYMBOL_GPL(tpm_dev_release);
+
+/*
+ * Called from tpm_<specific>.c probe function only for devices
+ * the driver has determined it should claim.  Prior to calling
+ * this function the specific probe function has called pci_enable_device
+ * upon errant exit from this function specific probe function should call
+ * pci_disable_device
+ */
+struct tpm_chip *tpm_register_hardware(struct device *dev,
+                                       const struct tpm_vendor_specific *entry)
+{
+       struct tpm_chip *chip;
+
+       /* Driver specific per-device data */
+       chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+
+       if (chip == NULL)
+               return NULL;
+
+       mutex_init(&chip->buffer_mutex);
+       mutex_init(&chip->tpm_mutex);
+       INIT_LIST_HEAD(&chip->list);
+
+       INIT_WORK(&chip->work, timeout_work);
+
+       setup_timer(&chip->user_read_timer, user_reader_timeout,
+                       (unsigned long)chip);
+
+       memcpy(&chip->vendor, entry, sizeof(struct tpm_vendor_specific));
+
+       chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES);
+
+       if (chip->dev_num >= TPM_NUM_DEVICES) {
+               dev_err(dev, "No available tpm device numbers\n");
+               goto out_free;
+       } else if (chip->dev_num == 0)
+               chip->vendor.miscdev.minor = TPM_MINOR;
+       else
+               chip->vendor.miscdev.minor = MISC_DYNAMIC_MINOR;
+
+       set_bit(chip->dev_num, dev_mask);
+
+       scnprintf(chip->devname, sizeof(chip->devname), "%s%d", "tpm",
+                 chip->dev_num);
+       chip->vendor.miscdev.name = chip->devname;
+
+       chip->vendor.miscdev.parent = dev;
+       chip->dev = get_device(dev);
+       chip->release = dev->release;
+       dev->release = tpm_dev_release;
+       dev_set_drvdata(dev, chip);
+
+       if (misc_register(&chip->vendor.miscdev)) {
+               dev_err(chip->dev,
+                       "unable to misc_register %s, minor %d\n",
+                       chip->vendor.miscdev.name,
+                       chip->vendor.miscdev.minor);
+               goto put_device;
+       }
+
+       if (sysfs_create_group(&dev->kobj, chip->vendor.attr_group)) {
+               misc_deregister(&chip->vendor.miscdev);
+               goto put_device;
+       }
+
+       if (tpm_add_ppi(&dev->kobj)) {
+               misc_deregister(&chip->vendor.miscdev);
+               goto put_device;
+       }
+
+       chip->bios_dir = tpm_bios_log_setup(chip->devname);
+
+       /* Make chip available */
+       spin_lock(&driver_lock);
+       list_add_rcu(&chip->list, &tpm_chip_list);
+       spin_unlock(&driver_lock);
+
+       return chip;
+
+put_device:
+       put_device(chip->dev);
+out_free:
+       kfree(chip);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(tpm_register_hardware);
+
+MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
+MODULE_DESCRIPTION("TPM Driver");
+MODULE_VERSION("2.0");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
deleted file mode 100644 (file)
index e3c974a..0000000
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@watson.ibm.com>
- * Dave Safford <safford@watson.ibm.com>
- * Reiner Sailer <sailer@watson.ibm.com>
- * Kylene Hall <kjhall@us.ibm.com>
- *
- * Maintained by: <tpmdd-devel@lists.sourceforge.net>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org      
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- * 
- * Note, the TPM chip is not interrupt driven (only polling)
- * and can have very long timeouts (minutes!). Hence the unusual
- * calls to msleep.
- *
- */
-
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/freezer.h>
-
-#include "tpm.h"
-#include "tpm_eventlog.h"
-
-enum tpm_duration {
-       TPM_SHORT = 0,
-       TPM_MEDIUM = 1,
-       TPM_LONG = 2,
-       TPM_UNDEFINED,
-};
-
-#define TPM_MAX_ORDINAL 243
-#define TSC_MAX_ORDINAL 12
-#define TPM_PROTECTED_COMMAND 0x00
-#define TPM_CONNECTION_COMMAND 0x40
-
-/*
- * Bug workaround - some TPM's don't flush the most
- * recently changed pcr on suspend, so force the flush
- * with an extend to the selected _unused_ non-volatile pcr.
- */
-static int tpm_suspend_pcr;
-module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644);
-MODULE_PARM_DESC(suspend_pcr,
-                "PCR to use for dummy writes to faciltate flush on suspend.");
-
-static LIST_HEAD(tpm_chip_list);
-static DEFINE_SPINLOCK(driver_lock);
-static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES);
-
-/*
- * Array with one entry per ordinal defining the maximum amount
- * of time the chip could take to return the result.  The ordinal
- * designation of short, medium or long is defined in a table in
- * TCG Specification TPM Main Part 2 TPM Structures Section 17. The
- * values of the SHORT, MEDIUM, and LONG durations are retrieved
- * from the chip during initialization with a call to tpm_get_timeouts.
- */
-static const u8 tpm_ordinal_duration[TPM_MAX_ORDINAL] = {
-       TPM_UNDEFINED,          /* 0 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 5 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 10 */
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_LONG,
-       TPM_LONG,
-       TPM_MEDIUM,             /* 15 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_LONG,
-       TPM_SHORT,              /* 20 */
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_SHORT,              /* 25 */
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_MEDIUM,             /* 30 */
-       TPM_LONG,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,              /* 35 */
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_MEDIUM,             /* 40 */
-       TPM_LONG,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,              /* 45 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_LONG,
-       TPM_MEDIUM,             /* 50 */
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 55 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_MEDIUM,             /* 60 */
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_MEDIUM,             /* 65 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 70 */
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 75 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_LONG,               /* 80 */
-       TPM_UNDEFINED,
-       TPM_MEDIUM,
-       TPM_LONG,
-       TPM_SHORT,
-       TPM_UNDEFINED,          /* 85 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 90 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,          /* 95 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_MEDIUM,             /* 100 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 105 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 110 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,              /* 115 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_LONG,               /* 120 */
-       TPM_LONG,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_SHORT,
-       TPM_SHORT,              /* 125 */
-       TPM_SHORT,
-       TPM_LONG,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,              /* 130 */
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,          /* 135 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 140 */
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 145 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 150 */
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,          /* 155 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 160 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 165 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_LONG,               /* 170 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 175 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_MEDIUM,             /* 180 */
-       TPM_SHORT,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_MEDIUM,             /* 185 */
-       TPM_SHORT,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 190 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 195 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 200 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,
-       TPM_SHORT,              /* 205 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_MEDIUM,             /* 210 */
-       TPM_UNDEFINED,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,          /* 215 */
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,
-       TPM_SHORT,              /* 220 */
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_SHORT,
-       TPM_UNDEFINED,          /* 225 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 230 */
-       TPM_LONG,
-       TPM_MEDIUM,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,          /* 235 */
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_UNDEFINED,
-       TPM_SHORT,              /* 240 */
-       TPM_UNDEFINED,
-       TPM_MEDIUM,
-};
-
-static void user_reader_timeout(unsigned long ptr)
-{
-       struct tpm_chip *chip = (struct tpm_chip *) ptr;
-
-       schedule_work(&chip->work);
-}
-
-static void timeout_work(struct work_struct *work)
-{
-       struct tpm_chip *chip = container_of(work, struct tpm_chip, work);
-
-       mutex_lock(&chip->buffer_mutex);
-       atomic_set(&chip->data_pending, 0);
-       memset(chip->data_buffer, 0, TPM_BUFSIZE);
-       mutex_unlock(&chip->buffer_mutex);
-}
-
-/*
- * Returns max number of jiffies to wait
- */
-unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip,
-                                          u32 ordinal)
-{
-       int duration_idx = TPM_UNDEFINED;
-       int duration = 0;
-       u8 category = (ordinal >> 24) & 0xFF;
-
-       if ((category == TPM_PROTECTED_COMMAND && ordinal < TPM_MAX_ORDINAL) ||
-           (category == TPM_CONNECTION_COMMAND && ordinal < TSC_MAX_ORDINAL))
-               duration_idx = tpm_ordinal_duration[ordinal];
-
-       if (duration_idx != TPM_UNDEFINED)
-               duration = chip->vendor.duration[duration_idx];
-       if (duration <= 0)
-               return 2 * 60 * HZ;
-       else
-               return duration;
-}
-EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
-
-/*
- * Internal kernel interface to transmit TPM commands
- */
-static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
-                           size_t bufsiz)
-{
-       ssize_t rc;
-       u32 count, ordinal;
-       unsigned long stop;
-
-       if (bufsiz > TPM_BUFSIZE)
-               bufsiz = TPM_BUFSIZE;
-
-       count = be32_to_cpu(*((__be32 *) (buf + 2)));
-       ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
-       if (count == 0)
-               return -ENODATA;
-       if (count > bufsiz) {
-               dev_err(chip->dev,
-                       "invalid count value %x %zx \n", count, bufsiz);
-               return -E2BIG;
-       }
-
-       mutex_lock(&chip->tpm_mutex);
-
-       if ((rc = chip->vendor.send(chip, (u8 *) buf, count)) < 0) {
-               dev_err(chip->dev,
-                       "tpm_transmit: tpm_send: error %zd\n", rc);
-               goto out;
-       }
-
-       if (chip->vendor.irq)
-               goto out_recv;
-
-       stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal);
-       do {
-               u8 status = chip->vendor.status(chip);
-               if ((status & chip->vendor.req_complete_mask) ==
-                   chip->vendor.req_complete_val)
-                       goto out_recv;
-
-               if (chip->vendor.req_canceled(chip, status)) {
-                       dev_err(chip->dev, "Operation Canceled\n");
-                       rc = -ECANCELED;
-                       goto out;
-               }
-
-               msleep(TPM_TIMEOUT);    /* CHECK */
-               rmb();
-       } while (time_before(jiffies, stop));
-
-       chip->vendor.cancel(chip);
-       dev_err(chip->dev, "Operation Timed out\n");
-       rc = -ETIME;
-       goto out;
-
-out_recv:
-       rc = chip->vendor.recv(chip, (u8 *) buf, bufsiz);
-       if (rc < 0)
-               dev_err(chip->dev,
-                       "tpm_transmit: tpm_recv: error %zd\n", rc);
-out:
-       mutex_unlock(&chip->tpm_mutex);
-       return rc;
-}
-
-#define TPM_DIGEST_SIZE 20
-#define TPM_RET_CODE_IDX 6
-
-enum tpm_capabilities {
-       TPM_CAP_FLAG = cpu_to_be32(4),
-       TPM_CAP_PROP = cpu_to_be32(5),
-       CAP_VERSION_1_1 = cpu_to_be32(0x06),
-       CAP_VERSION_1_2 = cpu_to_be32(0x1A)
-};
-
-enum tpm_sub_capabilities {
-       TPM_CAP_PROP_PCR = cpu_to_be32(0x101),
-       TPM_CAP_PROP_MANUFACTURER = cpu_to_be32(0x103),
-       TPM_CAP_FLAG_PERM = cpu_to_be32(0x108),
-       TPM_CAP_FLAG_VOL = cpu_to_be32(0x109),
-       TPM_CAP_PROP_OWNER = cpu_to_be32(0x111),
-       TPM_CAP_PROP_TIS_TIMEOUT = cpu_to_be32(0x115),
-       TPM_CAP_PROP_TIS_DURATION = cpu_to_be32(0x120),
-
-};
-
-static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
-                           int len, const char *desc)
-{
-       int err;
-
-       len = tpm_transmit(chip,(u8 *) cmd, len);
-       if (len <  0)
-               return len;
-       else if (len < TPM_HEADER_SIZE)
-               return -EFAULT;
-
-       err = be32_to_cpu(cmd->header.out.return_code);
-       if (err != 0 && desc)
-               dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc);
-
-       return err;
-}
-
-#define TPM_INTERNAL_RESULT_SIZE 200
-#define TPM_TAG_RQU_COMMAND cpu_to_be16(193)
-#define TPM_ORD_GET_CAP cpu_to_be32(101)
-#define TPM_ORD_GET_RANDOM cpu_to_be32(70)
-
-static const struct tpm_input_header tpm_getcap_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(22),
-       .ordinal = TPM_ORD_GET_CAP
-};
-
-ssize_t tpm_getcap(struct device *dev, __be32 subcap_id, cap_t *cap,
-                  const char *desc)
-{
-       struct tpm_cmd_t tpm_cmd;
-       int rc;
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       tpm_cmd.header.in = tpm_getcap_header;
-       if (subcap_id == CAP_VERSION_1_1 || subcap_id == CAP_VERSION_1_2) {
-               tpm_cmd.params.getcap_in.cap = subcap_id;
-               /*subcap field not necessary */
-               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(0);
-               tpm_cmd.header.in.length -= cpu_to_be32(sizeof(__be32));
-       } else {
-               if (subcap_id == TPM_CAP_FLAG_PERM ||
-                   subcap_id == TPM_CAP_FLAG_VOL)
-                       tpm_cmd.params.getcap_in.cap = TPM_CAP_FLAG;
-               else
-                       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-               tpm_cmd.params.getcap_in.subcap = subcap_id;
-       }
-       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc);
-       if (!rc)
-               *cap = tpm_cmd.params.getcap_out.cap;
-       return rc;
-}
-
-void tpm_gen_interrupt(struct tpm_chip *chip)
-{
-       struct  tpm_cmd_t tpm_cmd;
-       ssize_t rc;
-
-       tpm_cmd.header.in = tpm_getcap_header;
-       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-
-       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-                       "attempting to determine the timeouts");
-}
-EXPORT_SYMBOL_GPL(tpm_gen_interrupt);
-
-#define TPM_ORD_STARTUP cpu_to_be32(153)
-#define TPM_ST_CLEAR cpu_to_be16(1)
-#define TPM_ST_STATE cpu_to_be16(2)
-#define TPM_ST_DEACTIVATED cpu_to_be16(3)
-static const struct tpm_input_header tpm_startup_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(12),
-       .ordinal = TPM_ORD_STARTUP
-};
-
-static int tpm_startup(struct tpm_chip *chip, __be16 startup_type)
-{
-       struct tpm_cmd_t start_cmd;
-       start_cmd.header.in = tpm_startup_header;
-       start_cmd.params.startup_in.startup_type = startup_type;
-       return transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
-                           "attempting to start the TPM");
-}
-
-int tpm_get_timeouts(struct tpm_chip *chip)
-{
-       struct tpm_cmd_t tpm_cmd;
-       struct timeout_t *timeout_cap;
-       struct duration_t *duration_cap;
-       ssize_t rc;
-       u32 timeout;
-       unsigned int scale = 1;
-
-       tpm_cmd.header.in = tpm_getcap_header;
-       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL);
-
-       if (rc == TPM_ERR_INVALID_POSTINIT) {
-               /* The TPM is not started, we are the first to talk to it.
-                  Execute a startup command. */
-               dev_info(chip->dev, "Issuing TPM_STARTUP");
-               if (tpm_startup(chip, TPM_ST_CLEAR))
-                       return rc;
-
-               tpm_cmd.header.in = tpm_getcap_header;
-               tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-               tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-               tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-               rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-                                 NULL);
-       }
-       if (rc) {
-               dev_err(chip->dev,
-                       "A TPM error (%zd) occurred attempting to determine the timeouts\n",
-                       rc);
-               goto duration;
-       }
-
-       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
-           be32_to_cpu(tpm_cmd.header.out.length)
-           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32))
-               return -EINVAL;
-
-       timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout;
-       /* Don't overwrite default if value is 0 */
-       timeout = be32_to_cpu(timeout_cap->a);
-       if (timeout && timeout < 1000) {
-               /* timeouts in msec rather usec */
-               scale = 1000;
-               chip->vendor.timeout_adjusted = true;
-       }
-       if (timeout)
-               chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale);
-       timeout = be32_to_cpu(timeout_cap->b);
-       if (timeout)
-               chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale);
-       timeout = be32_to_cpu(timeout_cap->c);
-       if (timeout)
-               chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale);
-       timeout = be32_to_cpu(timeout_cap->d);
-       if (timeout)
-               chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale);
-
-duration:
-       tpm_cmd.header.in = tpm_getcap_header;
-       tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-       tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-       tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION;
-
-       rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-                       "attempting to determine the durations");
-       if (rc)
-               return rc;
-
-       if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
-           be32_to_cpu(tpm_cmd.header.out.length)
-           != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
-               return -EINVAL;
-
-       duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
-       chip->vendor.duration[TPM_SHORT] =
-           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
-       chip->vendor.duration[TPM_MEDIUM] =
-           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium));
-       chip->vendor.duration[TPM_LONG] =
-           usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long));
-
-       /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
-        * value wrong and apparently reports msecs rather than usecs. So we
-        * fix up the resulting too-small TPM_SHORT value to make things work.
-        * We also scale the TPM_MEDIUM and -_LONG values by 1000.
-        */
-       if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) {
-               chip->vendor.duration[TPM_SHORT] = HZ;
-               chip->vendor.duration[TPM_MEDIUM] *= 1000;
-               chip->vendor.duration[TPM_LONG] *= 1000;
-               chip->vendor.duration_adjusted = true;
-               dev_info(chip->dev, "Adjusting TPM timeout parameters.");
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(tpm_get_timeouts);
-
-#define TPM_ORD_CONTINUE_SELFTEST 83
-#define CONTINUE_SELFTEST_RESULT_SIZE 10
-
-static struct tpm_input_header continue_selftest_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(10),
-       .ordinal = cpu_to_be32(TPM_ORD_CONTINUE_SELFTEST),
-};
-
-/**
- * tpm_continue_selftest -- run TPM's selftest
- * @chip: TPM chip to use
- *
- * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing
- * a TPM error code.
- */
-static int tpm_continue_selftest(struct tpm_chip *chip)
-{
-       int rc;
-       struct tpm_cmd_t cmd;
-
-       cmd.header.in = continue_selftest_header;
-       rc = transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE,
-                         "continue selftest");
-       return rc;
-}
-
-ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr,
-                       char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-
-       rc = tpm_getcap(dev, TPM_CAP_FLAG_PERM, &cap,
-                        "attempting to determine the permanent enabled state");
-       if (rc)
-               return 0;
-
-       rc = sprintf(buf, "%d\n", !cap.perm_flags.disable);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_enabled);
-
-ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr,
-                       char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-
-       rc = tpm_getcap(dev, TPM_CAP_FLAG_PERM, &cap,
-                        "attempting to determine the permanent active state");
-       if (rc)
-               return 0;
-
-       rc = sprintf(buf, "%d\n", !cap.perm_flags.deactivated);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_active);
-
-ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr,
-                       char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_OWNER, &cap,
-                        "attempting to determine the owner state");
-       if (rc)
-               return 0;
-
-       rc = sprintf(buf, "%d\n", cap.owned);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_owned);
-
-ssize_t tpm_show_temp_deactivated(struct device * dev,
-                               struct device_attribute * attr, char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-
-       rc = tpm_getcap(dev, TPM_CAP_FLAG_VOL, &cap,
-                        "attempting to determine the temporary state");
-       if (rc)
-               return 0;
-
-       rc = sprintf(buf, "%d\n", cap.stclear_flags.deactivated);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_temp_deactivated);
-
-/*
- * tpm_chip_find_get - return tpm_chip for given chip number
- */
-static struct tpm_chip *tpm_chip_find_get(int chip_num)
-{
-       struct tpm_chip *pos, *chip = NULL;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
-               if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num)
-                       continue;
-
-               if (try_module_get(pos->dev->driver->owner)) {
-                       chip = pos;
-                       break;
-               }
-       }
-       rcu_read_unlock();
-       return chip;
-}
-
-#define TPM_ORDINAL_PCRREAD cpu_to_be32(21)
-#define READ_PCR_RESULT_SIZE 30
-static struct tpm_input_header pcrread_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(14),
-       .ordinal = TPM_ORDINAL_PCRREAD
-};
-
-static int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
-{
-       int rc;
-       struct tpm_cmd_t cmd;
-
-       cmd.header.in = pcrread_header;
-       cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
-       rc = transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE,
-                         "attempting to read a pcr value");
-
-       if (rc == 0)
-               memcpy(res_buf, cmd.params.pcrread_out.pcr_result,
-                      TPM_DIGEST_SIZE);
-       return rc;
-}
-
-/**
- * tpm_pcr_read - read a pcr value
- * @chip_num:  tpm idx # or ANY
- * @pcr_idx:   pcr idx to retrieve
- * @res_buf:   TPM_PCR value
- *             size of res_buf is 20 bytes (or NULL if you don't care)
- *
- * The TPM driver should be built-in, but for whatever reason it
- * isn't, protect against the chip disappearing, by incrementing
- * the module usage count.
- */
-int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf)
-{
-       struct tpm_chip *chip;
-       int rc;
-
-       chip = tpm_chip_find_get(chip_num);
-       if (chip == NULL)
-               return -ENODEV;
-       rc = __tpm_pcr_read(chip, pcr_idx, res_buf);
-       tpm_chip_put(chip);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_pcr_read);
-
-/**
- * tpm_pcr_extend - extend pcr value with hash
- * @chip_num:  tpm idx # or AN&
- * @pcr_idx:   pcr idx to extend
- * @hash:      hash value used to extend pcr value
- *
- * The TPM driver should be built-in, but for whatever reason it
- * isn't, protect against the chip disappearing, by incrementing
- * the module usage count.
- */
-#define TPM_ORD_PCR_EXTEND cpu_to_be32(20)
-#define EXTEND_PCR_RESULT_SIZE 34
-static struct tpm_input_header pcrextend_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(34),
-       .ordinal = TPM_ORD_PCR_EXTEND
-};
-
-int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
-{
-       struct tpm_cmd_t cmd;
-       int rc;
-       struct tpm_chip *chip;
-
-       chip = tpm_chip_find_get(chip_num);
-       if (chip == NULL)
-               return -ENODEV;
-
-       cmd.header.in = pcrextend_header;
-       cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
-       memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
-       rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
-                         "attempting extend a PCR value");
-
-       tpm_chip_put(chip);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_pcr_extend);
-
-/**
- * tpm_do_selftest - have the TPM continue its selftest and wait until it
- *                   can receive further commands
- * @chip: TPM chip to use
- *
- * Returns 0 on success, < 0 in case of fatal error or a value > 0 representing
- * a TPM error code.
- */
-int tpm_do_selftest(struct tpm_chip *chip)
-{
-       int rc;
-       unsigned int loops;
-       unsigned int delay_msec = 100;
-       unsigned long duration;
-       struct tpm_cmd_t cmd;
-
-       duration = tpm_calc_ordinal_duration(chip,
-                                            TPM_ORD_CONTINUE_SELFTEST);
-
-       loops = jiffies_to_msecs(duration) / delay_msec;
-
-       rc = tpm_continue_selftest(chip);
-       /* This may fail if there was no TPM driver during a suspend/resume
-        * cycle; some may return 10 (BAD_ORDINAL), others 28 (FAILEDSELFTEST)
-        */
-       if (rc)
-               return rc;
-
-       do {
-               /* Attempt to read a PCR value */
-               cmd.header.in = pcrread_header;
-               cmd.params.pcrread_in.pcr_idx = cpu_to_be32(0);
-               rc = tpm_transmit(chip, (u8 *) &cmd, READ_PCR_RESULT_SIZE);
-               /* Some buggy TPMs will not respond to tpm_tis_ready() for
-                * around 300ms while the self test is ongoing, keep trying
-                * until the self test duration expires. */
-               if (rc == -ETIME) {
-                       dev_info(chip->dev, HW_ERR "TPM command timed out during continue self test");
-                       msleep(delay_msec);
-                       continue;
-               }
-
-               if (rc < TPM_HEADER_SIZE)
-                       return -EFAULT;
-
-               rc = be32_to_cpu(cmd.header.out.return_code);
-               if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) {
-                       dev_info(chip->dev,
-                                "TPM is disabled/deactivated (0x%X)\n", rc);
-                       /* TPM is disabled and/or deactivated; driver can
-                        * proceed and TPM does handle commands for
-                        * suspend/resume correctly
-                        */
-                       return 0;
-               }
-               if (rc != TPM_WARN_DOING_SELFTEST)
-                       return rc;
-               msleep(delay_msec);
-       } while (--loops > 0);
-
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_do_selftest);
-
-int tpm_send(u32 chip_num, void *cmd, size_t buflen)
-{
-       struct tpm_chip *chip;
-       int rc;
-
-       chip = tpm_chip_find_get(chip_num);
-       if (chip == NULL)
-               return -ENODEV;
-
-       rc = transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
-
-       tpm_chip_put(chip);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_send);
-
-ssize_t tpm_show_pcrs(struct device *dev, struct device_attribute *attr,
-                     char *buf)
-{
-       cap_t cap;
-       u8 digest[TPM_DIGEST_SIZE];
-       ssize_t rc;
-       int i, j, num_pcrs;
-       char *str = buf;
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_PCR, &cap,
-                       "attempting to determine the number of PCRS");
-       if (rc)
-               return 0;
-
-       num_pcrs = be32_to_cpu(cap.num_pcrs);
-       for (i = 0; i < num_pcrs; i++) {
-               rc = __tpm_pcr_read(chip, i, digest);
-               if (rc)
-                       break;
-               str += sprintf(str, "PCR-%02d: ", i);
-               for (j = 0; j < TPM_DIGEST_SIZE; j++)
-                       str += sprintf(str, "%02X ", digest[j]);
-               str += sprintf(str, "\n");
-       }
-       return str - buf;
-}
-EXPORT_SYMBOL_GPL(tpm_show_pcrs);
-
-#define  READ_PUBEK_RESULT_SIZE 314
-#define TPM_ORD_READPUBEK cpu_to_be32(124)
-static struct tpm_input_header tpm_readpubek_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(30),
-       .ordinal = TPM_ORD_READPUBEK
-};
-
-ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr,
-                      char *buf)
-{
-       u8 *data;
-       struct tpm_cmd_t tpm_cmd;
-       ssize_t err;
-       int i, rc;
-       char *str = buf;
-
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       tpm_cmd.header.in = tpm_readpubek_header;
-       err = transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
-                       "attempting to read the PUBEK");
-       if (err)
-               goto out;
-
-       /* 
-          ignore header 10 bytes
-          algorithm 32 bits (1 == RSA )
-          encscheme 16 bits
-          sigscheme 16 bits
-          parameters (RSA 12->bytes: keybit, #primes, expbit)  
-          keylenbytes 32 bits
-          256 byte modulus
-          ignore checksum 20 bytes
-        */
-       data = tpm_cmd.params.readpubek_out_buffer;
-       str +=
-           sprintf(str,
-                   "Algorithm: %02X %02X %02X %02X\n"
-                   "Encscheme: %02X %02X\n"
-                   "Sigscheme: %02X %02X\n"
-                   "Parameters: %02X %02X %02X %02X "
-                   "%02X %02X %02X %02X "
-                   "%02X %02X %02X %02X\n"
-                   "Modulus length: %d\n"
-                   "Modulus:\n",
-                   data[0], data[1], data[2], data[3],
-                   data[4], data[5],
-                   data[6], data[7],
-                   data[12], data[13], data[14], data[15],
-                   data[16], data[17], data[18], data[19],
-                   data[20], data[21], data[22], data[23],
-                   be32_to_cpu(*((__be32 *) (data + 24))));
-
-       for (i = 0; i < 256; i++) {
-               str += sprintf(str, "%02X ", data[i + 28]);
-               if ((i + 1) % 16 == 0)
-                       str += sprintf(str, "\n");
-       }
-out:
-       rc = str - buf;
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_show_pubek);
-
-
-ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr,
-                     char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-       char *str = buf;
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap,
-                       "attempting to determine the manufacturer");
-       if (rc)
-               return 0;
-       str += sprintf(str, "Manufacturer: 0x%x\n",
-                      be32_to_cpu(cap.manufacturer_id));
-
-       rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap,
-                       "attempting to determine the 1.1 version");
-       if (rc)
-               return 0;
-       str += sprintf(str,
-                      "TCG version: %d.%d\nFirmware version: %d.%d\n",
-                      cap.tpm_version.Major, cap.tpm_version.Minor,
-                      cap.tpm_version.revMajor, cap.tpm_version.revMinor);
-       return str - buf;
-}
-EXPORT_SYMBOL_GPL(tpm_show_caps);
-
-ssize_t tpm_show_caps_1_2(struct device * dev,
-                         struct device_attribute * attr, char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-       char *str = buf;
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap,
-                       "attempting to determine the manufacturer");
-       if (rc)
-               return 0;
-       str += sprintf(str, "Manufacturer: 0x%x\n",
-                      be32_to_cpu(cap.manufacturer_id));
-       rc = tpm_getcap(dev, CAP_VERSION_1_2, &cap,
-                        "attempting to determine the 1.2 version");
-       if (rc)
-               return 0;
-       str += sprintf(str,
-                      "TCG version: %d.%d\nFirmware version: %d.%d\n",
-                      cap.tpm_version_1_2.Major, cap.tpm_version_1_2.Minor,
-                      cap.tpm_version_1_2.revMajor,
-                      cap.tpm_version_1_2.revMinor);
-       return str - buf;
-}
-EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
-
-ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (chip->vendor.duration[TPM_LONG] == 0)
-               return 0;
-
-       return sprintf(buf, "%d %d %d [%s]\n",
-                      jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
-                      jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
-                      jiffies_to_usecs(chip->vendor.duration[TPM_LONG]),
-                      chip->vendor.duration_adjusted
-                      ? "adjusted" : "original");
-}
-EXPORT_SYMBOL_GPL(tpm_show_durations);
-
-ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       return sprintf(buf, "%d %d %d %d [%s]\n",
-                      jiffies_to_usecs(chip->vendor.timeout_a),
-                      jiffies_to_usecs(chip->vendor.timeout_b),
-                      jiffies_to_usecs(chip->vendor.timeout_c),
-                      jiffies_to_usecs(chip->vendor.timeout_d),
-                      chip->vendor.timeout_adjusted
-                      ? "adjusted" : "original");
-}
-EXPORT_SYMBOL_GPL(tpm_show_timeouts);
-
-ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
-                       const char *buf, size_t count)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-       if (chip == NULL)
-               return 0;
-
-       chip->vendor.cancel(chip);
-       return count;
-}
-EXPORT_SYMBOL_GPL(tpm_store_cancel);
-
-static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, bool check_cancel,
-                                  bool *canceled)
-{
-       u8 status = chip->vendor.status(chip);
-
-       *canceled = false;
-       if ((status & mask) == mask)
-               return true;
-       if (check_cancel && chip->vendor.req_canceled(chip, status)) {
-               *canceled = true;
-               return true;
-       }
-       return false;
-}
-
-int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
-                     wait_queue_head_t *queue, bool check_cancel)
-{
-       unsigned long stop;
-       long rc;
-       u8 status;
-       bool canceled = false;
-
-       /* check current status */
-       status = chip->vendor.status(chip);
-       if ((status & mask) == mask)
-               return 0;
-
-       stop = jiffies + timeout;
-
-       if (chip->vendor.irq) {
-again:
-               timeout = stop - jiffies;
-               if ((long)timeout <= 0)
-                       return -ETIME;
-               rc = wait_event_interruptible_timeout(*queue,
-                       wait_for_tpm_stat_cond(chip, mask, check_cancel,
-                                              &canceled),
-                       timeout);
-               if (rc > 0) {
-                       if (canceled)
-                               return -ECANCELED;
-                       return 0;
-               }
-               if (rc == -ERESTARTSYS && freezing(current)) {
-                       clear_thread_flag(TIF_SIGPENDING);
-                       goto again;
-               }
-       } else {
-               do {
-                       msleep(TPM_TIMEOUT);
-                       status = chip->vendor.status(chip);
-                       if ((status & mask) == mask)
-                               return 0;
-               } while (time_before(jiffies, stop));
-       }
-       return -ETIME;
-}
-EXPORT_SYMBOL_GPL(wait_for_tpm_stat);
-/*
- * Device file system interface to the TPM
- *
- * It's assured that the chip will be opened just once,
- * by the check of is_open variable, which is protected
- * by driver_lock.
- */
-int tpm_open(struct inode *inode, struct file *file)
-{
-       int minor = iminor(inode);
-       struct tpm_chip *chip = NULL, *pos;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
-               if (pos->vendor.miscdev.minor == minor) {
-                       chip = pos;
-                       get_device(chip->dev);
-                       break;
-               }
-       }
-       rcu_read_unlock();
-
-       if (!chip)
-               return -ENODEV;
-
-       if (test_and_set_bit(0, &chip->is_open)) {
-               dev_dbg(chip->dev, "Another process owns this TPM\n");
-               put_device(chip->dev);
-               return -EBUSY;
-       }
-
-       chip->data_buffer = kzalloc(TPM_BUFSIZE, GFP_KERNEL);
-       if (chip->data_buffer == NULL) {
-               clear_bit(0, &chip->is_open);
-               put_device(chip->dev);
-               return -ENOMEM;
-       }
-
-       atomic_set(&chip->data_pending, 0);
-
-       file->private_data = chip;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(tpm_open);
-
-/*
- * Called on file close
- */
-int tpm_release(struct inode *inode, struct file *file)
-{
-       struct tpm_chip *chip = file->private_data;
-
-       del_singleshot_timer_sync(&chip->user_read_timer);
-       flush_work(&chip->work);
-       file->private_data = NULL;
-       atomic_set(&chip->data_pending, 0);
-       kzfree(chip->data_buffer);
-       clear_bit(0, &chip->is_open);
-       put_device(chip->dev);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(tpm_release);
-
-ssize_t tpm_write(struct file *file, const char __user *buf,
-                 size_t size, loff_t *off)
-{
-       struct tpm_chip *chip = file->private_data;
-       size_t in_size = size;
-       ssize_t out_size;
-
-       /* cannot perform a write until the read has cleared
-          either via tpm_read or a user_read_timer timeout.
-          This also prevents splitted buffered writes from blocking here.
-       */
-       if (atomic_read(&chip->data_pending) != 0)
-               return -EBUSY;
-
-       if (in_size > TPM_BUFSIZE)
-               return -E2BIG;
-
-       mutex_lock(&chip->buffer_mutex);
-
-       if (copy_from_user
-           (chip->data_buffer, (void __user *) buf, in_size)) {
-               mutex_unlock(&chip->buffer_mutex);
-               return -EFAULT;
-       }
-
-       /* atomic tpm command send and result receive */
-       out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
-       if (out_size < 0) {
-               mutex_unlock(&chip->buffer_mutex);
-               return out_size;
-       }
-
-       atomic_set(&chip->data_pending, out_size);
-       mutex_unlock(&chip->buffer_mutex);
-
-       /* Set a timeout by which the reader must come claim the result */
-       mod_timer(&chip->user_read_timer, jiffies + (60 * HZ));
-
-       return in_size;
-}
-EXPORT_SYMBOL_GPL(tpm_write);
-
-ssize_t tpm_read(struct file *file, char __user *buf,
-                size_t size, loff_t *off)
-{
-       struct tpm_chip *chip = file->private_data;
-       ssize_t ret_size;
-       int rc;
-
-       del_singleshot_timer_sync(&chip->user_read_timer);
-       flush_work(&chip->work);
-       ret_size = atomic_read(&chip->data_pending);
-       if (ret_size > 0) {     /* relay data */
-               ssize_t orig_ret_size = ret_size;
-               if (size < ret_size)
-                       ret_size = size;
-
-               mutex_lock(&chip->buffer_mutex);
-               rc = copy_to_user(buf, chip->data_buffer, ret_size);
-               memset(chip->data_buffer, 0, orig_ret_size);
-               if (rc)
-                       ret_size = -EFAULT;
-
-               mutex_unlock(&chip->buffer_mutex);
-       }
-
-       atomic_set(&chip->data_pending, 0);
-
-       return ret_size;
-}
-EXPORT_SYMBOL_GPL(tpm_read);
-
-void tpm_remove_hardware(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (chip == NULL) {
-               dev_err(dev, "No device data found\n");
-               return;
-       }
-
-       spin_lock(&driver_lock);
-       list_del_rcu(&chip->list);
-       spin_unlock(&driver_lock);
-       synchronize_rcu();
-
-       misc_deregister(&chip->vendor.miscdev);
-       sysfs_remove_group(&dev->kobj, chip->vendor.attr_group);
-       tpm_remove_ppi(&dev->kobj);
-       tpm_bios_log_teardown(chip->bios_dir);
-
-       /* write it this way to be explicit (chip->dev == dev) */
-       put_device(chip->dev);
-}
-EXPORT_SYMBOL_GPL(tpm_remove_hardware);
-
-#define TPM_ORD_SAVESTATE cpu_to_be32(152)
-#define SAVESTATE_RESULT_SIZE 10
-
-static struct tpm_input_header savestate_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(10),
-       .ordinal = TPM_ORD_SAVESTATE
-};
-
-/*
- * We are about to suspend. Save the TPM state
- * so that it can be restored.
- */
-int tpm_pm_suspend(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-       struct tpm_cmd_t cmd;
-       int rc, try;
-
-       u8 dummy_hash[TPM_DIGEST_SIZE] = { 0 };
-
-       if (chip == NULL)
-               return -ENODEV;
-
-       /* for buggy tpm, flush pcrs with extend to selected dummy */
-       if (tpm_suspend_pcr) {
-               cmd.header.in = pcrextend_header;
-               cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(tpm_suspend_pcr);
-               memcpy(cmd.params.pcrextend_in.hash, dummy_hash,
-                      TPM_DIGEST_SIZE);
-               rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
-                                 "extending dummy pcr before suspend");
-       }
-
-       /* now do the actual savestate */
-       for (try = 0; try < TPM_RETRY; try++) {
-               cmd.header.in = savestate_header;
-               rc = transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL);
-
-               /*
-                * If the TPM indicates that it is too busy to respond to
-                * this command then retry before giving up.  It can take
-                * several seconds for this TPM to be ready.
-                *
-                * This can happen if the TPM has already been sent the
-                * SaveState command before the driver has loaded.  TCG 1.2
-                * specification states that any communication after SaveState
-                * may cause the TPM to invalidate previously saved state.
-                */
-               if (rc != TPM_WARN_RETRY)
-                       break;
-               msleep(TPM_TIMEOUT_RETRY);
-       }
-
-       if (rc)
-               dev_err(chip->dev,
-                       "Error (%d) sending savestate before suspend\n", rc);
-       else if (try > 0)
-               dev_warn(chip->dev, "TPM savestate took %dms\n",
-                        try * TPM_TIMEOUT_RETRY);
-
-       return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_pm_suspend);
-
-/*
- * Resume from a power safe. The BIOS already restored
- * the TPM state.
- */
-int tpm_pm_resume(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (chip == NULL)
-               return -ENODEV;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(tpm_pm_resume);
-
-#define TPM_GETRANDOM_RESULT_SIZE      18
-static struct tpm_input_header tpm_getrandom_header = {
-       .tag = TPM_TAG_RQU_COMMAND,
-       .length = cpu_to_be32(14),
-       .ordinal = TPM_ORD_GET_RANDOM
-};
-
-/**
- * tpm_get_random() - Get random bytes from the tpm's RNG
- * @chip_num: A specific chip number for the request or TPM_ANY_NUM
- * @out: destination buffer for the random bytes
- * @max: the max number of bytes to write to @out
- *
- * Returns < 0 on error and the number of bytes read on success
- */
-int tpm_get_random(u32 chip_num, u8 *out, size_t max)
-{
-       struct tpm_chip *chip;
-       struct tpm_cmd_t tpm_cmd;
-       u32 recd, num_bytes = min_t(u32, max, TPM_MAX_RNG_DATA);
-       int err, total = 0, retries = 5;
-       u8 *dest = out;
-
-       chip = tpm_chip_find_get(chip_num);
-       if (chip == NULL)
-               return -ENODEV;
-
-       if (!out || !num_bytes || max > TPM_MAX_RNG_DATA)
-               return -EINVAL;
-
-       do {
-               tpm_cmd.header.in = tpm_getrandom_header;
-               tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
-
-               err = transmit_cmd(chip, &tpm_cmd,
-                                  TPM_GETRANDOM_RESULT_SIZE + num_bytes,
-                                  "attempting get random");
-               if (err)
-                       break;
-
-               recd = be32_to_cpu(tpm_cmd.params.getrandom_out.rng_data_len);
-               memcpy(dest, tpm_cmd.params.getrandom_out.rng_data, recd);
-
-               dest += recd;
-               total += recd;
-               num_bytes -= recd;
-       } while (retries-- && total < max);
-
-       return total ? total : -EIO;
-}
-EXPORT_SYMBOL_GPL(tpm_get_random);
-
-/* In case vendor provided release function, call it too.*/
-
-void tpm_dev_vendor_release(struct tpm_chip *chip)
-{
-       if (!chip)
-               return;
-
-       if (chip->vendor.release)
-               chip->vendor.release(chip->dev);
-
-       clear_bit(chip->dev_num, dev_mask);
-       kfree(chip->vendor.miscdev.name);
-}
-EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
-
-
-/*
- * Once all references to platform device are down to 0,
- * release all allocated structures.
- */
-void tpm_dev_release(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (!chip)
-               return;
-
-       tpm_dev_vendor_release(chip);
-
-       chip->release(dev);
-       kfree(chip);
-}
-EXPORT_SYMBOL_GPL(tpm_dev_release);
-
-/*
- * Called from tpm_<specific>.c probe function only for devices 
- * the driver has determined it should claim.  Prior to calling
- * this function the specific probe function has called pci_enable_device
- * upon errant exit from this function specific probe function should call
- * pci_disable_device
- */
-struct tpm_chip *tpm_register_hardware(struct device *dev,
-                                       const struct tpm_vendor_specific *entry)
-{
-#define DEVNAME_SIZE 7
-
-       char *devname;
-       struct tpm_chip *chip;
-
-       /* Driver specific per-device data */
-       chip = kzalloc(sizeof(*chip), GFP_KERNEL);
-       devname = kmalloc(DEVNAME_SIZE, GFP_KERNEL);
-
-       if (chip == NULL || devname == NULL)
-               goto out_free;
-
-       mutex_init(&chip->buffer_mutex);
-       mutex_init(&chip->tpm_mutex);
-       INIT_LIST_HEAD(&chip->list);
-
-       INIT_WORK(&chip->work, timeout_work);
-
-       setup_timer(&chip->user_read_timer, user_reader_timeout,
-                       (unsigned long)chip);
-
-       memcpy(&chip->vendor, entry, sizeof(struct tpm_vendor_specific));
-
-       chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES);
-
-       if (chip->dev_num >= TPM_NUM_DEVICES) {
-               dev_err(dev, "No available tpm device numbers\n");
-               goto out_free;
-       } else if (chip->dev_num == 0)
-               chip->vendor.miscdev.minor = TPM_MINOR;
-       else
-               chip->vendor.miscdev.minor = MISC_DYNAMIC_MINOR;
-
-       set_bit(chip->dev_num, dev_mask);
-
-       scnprintf(devname, DEVNAME_SIZE, "%s%d", "tpm", chip->dev_num);
-       chip->vendor.miscdev.name = devname;
-
-       chip->vendor.miscdev.parent = dev;
-       chip->dev = get_device(dev);
-       chip->release = dev->release;
-       dev->release = tpm_dev_release;
-       dev_set_drvdata(dev, chip);
-
-       if (misc_register(&chip->vendor.miscdev)) {
-               dev_err(chip->dev,
-                       "unable to misc_register %s, minor %d\n",
-                       chip->vendor.miscdev.name,
-                       chip->vendor.miscdev.minor);
-               goto put_device;
-       }
-
-       if (sysfs_create_group(&dev->kobj, chip->vendor.attr_group)) {
-               misc_deregister(&chip->vendor.miscdev);
-               goto put_device;
-       }
-
-       if (tpm_add_ppi(&dev->kobj)) {
-               misc_deregister(&chip->vendor.miscdev);
-               goto put_device;
-       }
-
-       chip->bios_dir = tpm_bios_log_setup(devname);
-
-       /* Make chip available */
-       spin_lock(&driver_lock);
-       list_add_rcu(&chip->list, &tpm_chip_list);
-       spin_unlock(&driver_lock);
-
-       return chip;
-
-put_device:
-       put_device(chip->dev);
-out_free:
-       kfree(chip);
-       kfree(devname);
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(tpm_register_hardware);
-
-MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
-MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
-MODULE_LICENSE("GPL");
index a7bfc176ed4316bc8319220f1b73848bc9d74f06..f32847872193ad7acdf67bef8921c9f48d474854 100644 (file)
@@ -59,8 +59,6 @@ extern ssize_t tpm_show_pcrs(struct device *, struct device_attribute *attr,
                                char *);
 extern ssize_t tpm_show_caps(struct device *, struct device_attribute *attr,
                                char *);
-extern ssize_t tpm_show_caps_1_2(struct device *, struct device_attribute *attr,
-                               char *);
 extern ssize_t tpm_store_cancel(struct device *, struct device_attribute *attr,
                                const char *, size_t);
 extern ssize_t tpm_show_enabled(struct device *, struct device_attribute *attr,
@@ -122,6 +120,7 @@ struct tpm_chip {
        struct device *dev;     /* Device stuff */
 
        int dev_num;            /* /dev/tpm# */
+       char devname[7];
        unsigned long is_open;  /* only one allowed */
        int time_expired;
 
index 99d6820c611db2e0f0f344c9cf701cfc02b0505d..c9a528d25d22001141b5809fa255fbc3d1d42154 100644 (file)
@@ -202,7 +202,7 @@ static int __init init_atmel(void)
 
        have_region =
            (atmel_request_region
-            (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
+            (base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
 
        pdev = platform_device_register_simple("tpm_atmel", -1, NULL, 0);
        if (IS_ERR(pdev)) {
index 84ddc557b8f8689cd1af8e04ed825e42283fd442..59f7cb28260b4f4c23af010f7615befdd6ed7d23 100644 (file)
@@ -406,7 +406,6 @@ out_tpm:
 out:
        return NULL;
 }
-EXPORT_SYMBOL_GPL(tpm_bios_log_setup);
 
 void tpm_bios_log_teardown(struct dentry **lst)
 {
@@ -415,5 +414,3 @@ void tpm_bios_log_teardown(struct dentry **lst)
        for (i = 0; i < 3; i++)
                securityfs_remove(lst[i]);
 }
-EXPORT_SYMBOL_GPL(tpm_bios_log_teardown);
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c
new file mode 100644 (file)
index 0000000..c3cd7fe
--- /dev/null
@@ -0,0 +1,284 @@
+/*
+ * ATMEL I2C TPM AT97SC3204T
+ *
+ * Copyright (C) 2012 V Lab Technologies
+ *  Teddy Reed <teddy@prosauce.org>
+ * Copyright (C) 2013, Obsidian Research Corp.
+ *  Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+ * Device driver for ATMEL I2C TPMs.
+ *
+ * Teddy Reed determined the basic I2C command flow, unlike other I2C TPM
+ * devices the raw TCG formatted TPM command data is written via I2C and then
+ * raw TCG formatted TPM command data is returned via I2C.
+ *
+ * TGC status/locality/etc functions seen in the LPC implementation do not
+ * seem to be present.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/>.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include "tpm.h"
+
+#define I2C_DRIVER_NAME "tpm_i2c_atmel"
+
+#define TPM_I2C_SHORT_TIMEOUT  750     /* ms */
+#define TPM_I2C_LONG_TIMEOUT   2000    /* 2 sec */
+
+#define ATMEL_STS_OK 1
+
+struct priv_data {
+       size_t len;
+       /* This is the amount we read on the first try. 25 was chosen to fit a
+        * fair number of read responses in the buffer so a 2nd retry can be
+        * avoided in small message cases. */
+       u8 buffer[sizeof(struct tpm_output_header) + 25];
+};
+
+static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+
+       priv->len = 0;
+
+       if (len <= 2)
+               return -EIO;
+
+       status = i2c_master_send(client, buf, len);
+
+       dev_dbg(chip->dev,
+               "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__,
+               (int)min_t(size_t, 64, len), buf, len, status);
+       return status;
+}
+
+static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       struct tpm_output_header *hdr =
+               (struct tpm_output_header *)priv->buffer;
+       u32 expected_len;
+       int rc;
+
+       if (priv->len == 0)
+               return -EIO;
+
+       /* Get the message size from the message header, if we didn't get the
+        * whole message in read_status then we need to re-read the
+        * message. */
+       expected_len = be32_to_cpu(hdr->length);
+       if (expected_len > count)
+               return -ENOMEM;
+
+       if (priv->len >= expected_len) {
+               dev_dbg(chip->dev,
+                       "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
+                       (int)min_t(size_t, 64, expected_len), buf, count,
+                       expected_len);
+               memcpy(buf, priv->buffer, expected_len);
+               return expected_len;
+       }
+
+       rc = i2c_master_recv(client, buf, expected_len);
+       dev_dbg(chip->dev,
+               "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
+               (int)min_t(size_t, 64, expected_len), buf, count,
+               expected_len);
+       return rc;
+}
+
+static void i2c_atmel_cancel(struct tpm_chip *chip)
+{
+       dev_err(chip->dev, "TPM operation cancellation was requested, but is not supported");
+}
+
+static u8 i2c_atmel_read_status(struct tpm_chip *chip)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int rc;
+
+       /* The TPM fails the I2C read until it is ready, so we do the entire
+        * transfer here and buffer it locally. This way the common code can
+        * properly handle the timeouts. */
+       priv->len = 0;
+       memset(priv->buffer, 0, sizeof(priv->buffer));
+
+
+       /* Once the TPM has completed the command the command remains readable
+        * until another command is issued. */
+       rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer));
+       dev_dbg(chip->dev,
+               "%s: sts=%d", __func__, rc);
+       if (rc <= 0)
+               return 0;
+
+       priv->len = rc;
+
+       return ATMEL_STS_OK;
+}
+
+static const struct file_operations i2c_atmel_ops = {
+       .owner = THIS_MODULE,
+       .llseek = no_llseek,
+       .open = tpm_open,
+       .read = tpm_read,
+       .write = tpm_write,
+       .release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+
+static struct attribute *i2c_atmel_attrs[] = {
+       &dev_attr_pubek.attr,
+       &dev_attr_pcrs.attr,
+       &dev_attr_enabled.attr,
+       &dev_attr_active.attr,
+       &dev_attr_owned.attr,
+       &dev_attr_temp_deactivated.attr,
+       &dev_attr_caps.attr,
+       &dev_attr_cancel.attr,
+       &dev_attr_durations.attr,
+       &dev_attr_timeouts.attr,
+       NULL,
+};
+
+static struct attribute_group i2c_atmel_attr_grp = {
+       .attrs = i2c_atmel_attrs
+};
+
+static bool i2c_atmel_req_canceled(struct tpm_chip *chip, u8 status)
+{
+       return 0;
+}
+
+static const struct tpm_vendor_specific i2c_atmel = {
+       .status = i2c_atmel_read_status,
+       .recv = i2c_atmel_recv,
+       .send = i2c_atmel_send,
+       .cancel = i2c_atmel_cancel,
+       .req_complete_mask = ATMEL_STS_OK,
+       .req_complete_val = ATMEL_STS_OK,
+       .req_canceled = i2c_atmel_req_canceled,
+       .attr_group = &i2c_atmel_attr_grp,
+       .miscdev.fops = &i2c_atmel_ops,
+};
+
+static int i2c_atmel_probe(struct i2c_client *client,
+                          const struct i2c_device_id *id)
+{
+       int rc;
+       struct tpm_chip *chip;
+       struct device *dev = &client->dev;
+
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+               return -ENODEV;
+
+       chip = tpm_register_hardware(dev, &i2c_atmel);
+       if (!chip) {
+               dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
+               return -ENODEV;
+       }
+
+       chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
+                                        GFP_KERNEL);
+
+       /* Default timeouts */
+       chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT);
+       chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.irq = 0;
+
+       /* There is no known way to probe for this device, and all version
+        * information seems to be read via TPM commands. Thus we rely on the
+        * TPM startup process in the common code to detect the device. */
+       if (tpm_get_timeouts(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       if (tpm_do_selftest(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       return 0;
+
+out_err:
+       tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(chip->dev);
+       return rc;
+}
+
+static int i2c_atmel_remove(struct i2c_client *client)
+{
+       struct device *dev = &(client->dev);
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip)
+               tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(dev);
+       kfree(chip);
+       return 0;
+}
+
+static const struct i2c_device_id i2c_atmel_id[] = {
+       {I2C_DRIVER_NAME, 0},
+       {}
+};
+MODULE_DEVICE_TABLE(i2c, i2c_atmel_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_atmel_of_match[] = {
+       {.compatible = "atmel,at97sc3204t"},
+       {},
+};
+MODULE_DEVICE_TABLE(of, i2c_atmel_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume);
+
+static struct i2c_driver i2c_atmel_driver = {
+       .id_table = i2c_atmel_id,
+       .probe = i2c_atmel_probe,
+       .remove = i2c_atmel_remove,
+       .driver = {
+               .name = I2C_DRIVER_NAME,
+               .owner = THIS_MODULE,
+               .pm = &i2c_atmel_pm_ops,
+               .of_match_table = of_match_ptr(i2c_atmel_of_match),
+       },
+};
+
+module_i2c_driver(i2c_atmel_driver);
+
+MODULE_AUTHOR("Jason Gunthorpe <jgunthorpe@obsidianresearch.com>");
+MODULE_DESCRIPTION("Atmel TPM I2C Driver");
+MODULE_LICENSE("GPL");
index b8735de8ce956a3d19fd3ac5b9a5df77c57d3a22..fefd2aa5c81e4aaa6f9069eb26000f9152acface 100644 (file)
@@ -581,7 +581,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
 static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
 static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
 static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
 static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
@@ -685,7 +685,6 @@ out_vendor:
        chip->dev->release = NULL;
        chip->release = NULL;
        tpm_dev.client = NULL;
-       dev_set_drvdata(chip->dev, chip);
 out_err:
        return rc;
 }
@@ -766,7 +765,6 @@ static int tpm_tis_i2c_remove(struct i2c_client *client)
        chip->dev->release = NULL;
        chip->release = NULL;
        tpm_dev.client = NULL;
-       dev_set_drvdata(chip->dev, chip);
 
        return 0;
 }
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
new file mode 100644 (file)
index 0000000..6276fea
--- /dev/null
@@ -0,0 +1,710 @@
+/******************************************************************************
+ * Nuvoton TPM I2C Device Driver Interface for WPCT301/NPCT501,
+ * based on the TCG TPM Interface Spec version 1.2.
+ * Specifications at www.trustedcomputinggroup.org
+ *
+ * Copyright (C) 2011, Nuvoton Technology Corporation.
+ *  Dan Morav <dan.morav@nuvoton.com>
+ * Copyright (C) 2013, Obsidian Research Corp.
+ *  Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/>.
+ *
+ * Nuvoton contact information: APC.Support@nuvoton.com
+ *****************************************************************************/
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/i2c.h>
+#include "tpm.h"
+
+/* I2C interface offsets */
+#define TPM_STS                0x00
+#define TPM_BURST_COUNT        0x01
+#define TPM_DATA_FIFO_W        0x20
+#define TPM_DATA_FIFO_R        0x40
+#define TPM_VID_DID_RID        0x60
+/* TPM command header size */
+#define TPM_HEADER_SIZE        10
+#define TPM_RETRY      5
+/*
+ * I2C bus device maximum buffer size w/o counting I2C address or command
+ * i.e. max size required for I2C write is 34 = addr, command, 32 bytes data
+ */
+#define TPM_I2C_MAX_BUF_SIZE           32
+#define TPM_I2C_RETRY_COUNT            32
+#define TPM_I2C_BUS_DELAY              1       /* msec */
+#define TPM_I2C_RETRY_DELAY_SHORT      2       /* msec */
+#define TPM_I2C_RETRY_DELAY_LONG       10      /* msec */
+
+#define I2C_DRIVER_NAME "tpm_i2c_nuvoton"
+
+struct priv_data {
+       unsigned int intrs;
+};
+
+static s32 i2c_nuvoton_read_buf(struct i2c_client *client, u8 offset, u8 size,
+                               u8 *data)
+{
+       s32 status;
+
+       status = i2c_smbus_read_i2c_block_data(client, offset, size, data);
+       dev_dbg(&client->dev,
+               "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__,
+               offset, size, (int)size, data, status);
+       return status;
+}
+
+static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size,
+                                u8 *data)
+{
+       s32 status;
+
+       status = i2c_smbus_write_i2c_block_data(client, offset, size, data);
+       dev_dbg(&client->dev,
+               "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__,
+               offset, size, (int)size, data, status);
+       return status;
+}
+
+#define TPM_STS_VALID          0x80
+#define TPM_STS_COMMAND_READY  0x40
+#define TPM_STS_GO             0x20
+#define TPM_STS_DATA_AVAIL     0x10
+#define TPM_STS_EXPECT         0x08
+#define TPM_STS_RESPONSE_RETRY 0x02
+#define TPM_STS_ERR_VAL        0x07    /* bit2...bit0 reads always 0 */
+
+#define TPM_I2C_SHORT_TIMEOUT  750     /* ms */
+#define TPM_I2C_LONG_TIMEOUT   2000    /* 2 sec */
+
+/* read TPM_STS register */
+static u8 i2c_nuvoton_read_status(struct tpm_chip *chip)
+{
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+       u8 data;
+
+       status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data);
+       if (status <= 0) {
+               dev_err(chip->dev, "%s() error return %d\n", __func__,
+                       status);
+               data = TPM_STS_ERR_VAL;
+       }
+
+       return data;
+}
+
+/* write byte to TPM_STS register */
+static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data)
+{
+       s32 status;
+       int i;
+
+       /* this causes the current command to be aborted */
+       for (i = 0, status = -1; i < TPM_I2C_RETRY_COUNT && status < 0; i++) {
+               status = i2c_nuvoton_write_buf(client, TPM_STS, 1, &data);
+               msleep(TPM_I2C_BUS_DELAY);
+       }
+       return status;
+}
+
+/* write commandReady to TPM_STS register */
+static void i2c_nuvoton_ready(struct tpm_chip *chip)
+{
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+
+       /* this causes the current command to be aborted */
+       status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY);
+       if (status < 0)
+               dev_err(chip->dev,
+                       "%s() fail to write TPM_STS.commandReady\n", __func__);
+}
+
+/* read burstCount field from TPM_STS register
+ * return -1 on fail to read */
+static int i2c_nuvoton_get_burstcount(struct i2c_client *client,
+                                     struct tpm_chip *chip)
+{
+       unsigned long stop = jiffies + chip->vendor.timeout_d;
+       s32 status;
+       int burst_count = -1;
+       u8 data;
+
+       /* wait for burstcount to be non-zero */
+       do {
+               /* in I2C burstCount is 1 byte */
+               status = i2c_nuvoton_read_buf(client, TPM_BURST_COUNT, 1,
+                                             &data);
+               if (status > 0 && data > 0) {
+                       burst_count = min_t(u8, TPM_I2C_MAX_BUF_SIZE, data);
+                       break;
+               }
+               msleep(TPM_I2C_BUS_DELAY);
+       } while (time_before(jiffies, stop));
+
+       return burst_count;
+}
+
+/*
+ * WPCT301/NPCT501 SINT# supports only dataAvail
+ * any call to this function which is not waiting for dataAvail will
+ * set queue to NULL to avoid waiting for interrupt
+ */
+static bool i2c_nuvoton_check_status(struct tpm_chip *chip, u8 mask, u8 value)
+{
+       u8 status = i2c_nuvoton_read_status(chip);
+       return (status != TPM_STS_ERR_VAL) && ((status & mask) == value);
+}
+
+static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value,
+                                    u32 timeout, wait_queue_head_t *queue)
+{
+       if (chip->vendor.irq && queue) {
+               s32 rc;
+               DEFINE_WAIT(wait);
+               struct priv_data *priv = chip->vendor.priv;
+               unsigned int cur_intrs = priv->intrs;
+
+               enable_irq(chip->vendor.irq);
+               rc = wait_event_interruptible_timeout(*queue,
+                                                     cur_intrs != priv->intrs,
+                                                     timeout);
+               if (rc > 0)
+                       return 0;
+               /* At this point we know that the SINT pin is asserted, so we
+                * do not need to do i2c_nuvoton_check_status */
+       } else {
+               unsigned long ten_msec, stop;
+               bool status_valid;
+
+               /* check current status */
+               status_valid = i2c_nuvoton_check_status(chip, mask, value);
+               if (status_valid)
+                       return 0;
+
+               /* use polling to wait for the event */
+               ten_msec = jiffies + msecs_to_jiffies(TPM_I2C_RETRY_DELAY_LONG);
+               stop = jiffies + timeout;
+               do {
+                       if (time_before(jiffies, ten_msec))
+                               msleep(TPM_I2C_RETRY_DELAY_SHORT);
+                       else
+                               msleep(TPM_I2C_RETRY_DELAY_LONG);
+                       status_valid = i2c_nuvoton_check_status(chip, mask,
+                                                               value);
+                       if (status_valid)
+                               return 0;
+               } while (time_before(jiffies, stop));
+       }
+       dev_err(chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask,
+               value);
+       return -ETIMEDOUT;
+}
+
+/* wait for dataAvail field to be set in the TPM_STS register */
+static int i2c_nuvoton_wait_for_data_avail(struct tpm_chip *chip, u32 timeout,
+                                          wait_queue_head_t *queue)
+{
+       return i2c_nuvoton_wait_for_stat(chip,
+                                        TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+                                        TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+                                        timeout, queue);
+}
+
+/* Read @count bytes into @buf from TPM_RD_FIFO register */
+static int i2c_nuvoton_recv_data(struct i2c_client *client,
+                                struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       s32 rc;
+       int burst_count, bytes2read, size = 0;
+
+       while (size < count &&
+              i2c_nuvoton_wait_for_data_avail(chip,
+                                              chip->vendor.timeout_c,
+                                              &chip->vendor.read_queue) == 0) {
+               burst_count = i2c_nuvoton_get_burstcount(client, chip);
+               if (burst_count < 0) {
+                       dev_err(chip->dev,
+                               "%s() fail to read burstCount=%d\n", __func__,
+                               burst_count);
+                       return -EIO;
+               }
+               bytes2read = min_t(size_t, burst_count, count - size);
+               rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R,
+                                         bytes2read, &buf[size]);
+               if (rc < 0) {
+                       dev_err(chip->dev,
+                               "%s() fail on i2c_nuvoton_read_buf()=%d\n",
+                               __func__, rc);
+                       return -EIO;
+               }
+               dev_dbg(chip->dev, "%s(%d):", __func__, bytes2read);
+               size += bytes2read;
+       }
+
+       return size;
+}
+
+/* Read TPM command results */
+static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       struct device *dev = chip->dev;
+       struct i2c_client *client = to_i2c_client(dev);
+       s32 rc;
+       int expected, status, burst_count, retries, size = 0;
+
+       if (count < TPM_HEADER_SIZE) {
+               i2c_nuvoton_ready(chip);    /* return to idle */
+               dev_err(dev, "%s() count < header size\n", __func__);
+               return -EIO;
+       }
+       for (retries = 0; retries < TPM_RETRY; retries++) {
+               if (retries > 0) {
+                       /* if this is not the first trial, set responseRetry */
+                       i2c_nuvoton_write_status(client,
+                                                TPM_STS_RESPONSE_RETRY);
+               }
+               /*
+                * read first available (> 10 bytes), including:
+                * tag, paramsize, and result
+                */
+               status = i2c_nuvoton_wait_for_data_avail(
+                       chip, chip->vendor.timeout_c, &chip->vendor.read_queue);
+               if (status != 0) {
+                       dev_err(dev, "%s() timeout on dataAvail\n", __func__);
+                       size = -ETIMEDOUT;
+                       continue;
+               }
+               burst_count = i2c_nuvoton_get_burstcount(client, chip);
+               if (burst_count < 0) {
+                       dev_err(dev, "%s() fail to get burstCount\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               size = i2c_nuvoton_recv_data(client, chip, buf,
+                                            burst_count);
+               if (size < TPM_HEADER_SIZE) {
+                       dev_err(dev, "%s() fail to read header\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               /*
+                * convert number of expected bytes field from big endian 32 bit
+                * to machine native
+                */
+               expected = be32_to_cpu(*(__be32 *) (buf + 2));
+               if (expected > count) {
+                       dev_err(dev, "%s() expected > count\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               rc = i2c_nuvoton_recv_data(client, chip, &buf[size],
+                                          expected - size);
+               size += rc;
+               if (rc < 0 || size < expected) {
+                       dev_err(dev, "%s() fail to read remainder of result\n",
+                               __func__);
+                       size = -EIO;
+                       continue;
+               }
+               if (i2c_nuvoton_wait_for_stat(
+                           chip, TPM_STS_VALID | TPM_STS_DATA_AVAIL,
+                           TPM_STS_VALID, chip->vendor.timeout_c,
+                           NULL)) {
+                       dev_err(dev, "%s() error left over data\n", __func__);
+                       size = -ETIMEDOUT;
+                       continue;
+               }
+               break;
+       }
+       i2c_nuvoton_ready(chip);
+       dev_dbg(chip->dev, "%s() -> %d\n", __func__, size);
+       return size;
+}
+
+/*
+ * Send TPM command.
+ *
+ * If interrupts are used (signaled by an irq set in the vendor structure)
+ * tpm.c can skip polling for the data to be available as the interrupt is
+ * waited for here
+ */
+static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+       struct device *dev = chip->dev;
+       struct i2c_client *client = to_i2c_client(dev);
+       u32 ordinal;
+       size_t count = 0;
+       int burst_count, bytes2write, retries, rc = -EIO;
+
+       for (retries = 0; retries < TPM_RETRY; retries++) {
+               i2c_nuvoton_ready(chip);
+               if (i2c_nuvoton_wait_for_stat(chip, TPM_STS_COMMAND_READY,
+                                             TPM_STS_COMMAND_READY,
+                                             chip->vendor.timeout_b, NULL)) {
+                       dev_err(dev, "%s() timeout on commandReady\n",
+                               __func__);
+                       rc = -EIO;
+                       continue;
+               }
+               rc = 0;
+               while (count < len - 1) {
+                       burst_count = i2c_nuvoton_get_burstcount(client,
+                                                                chip);
+                       if (burst_count < 0) {
+                               dev_err(dev, "%s() fail get burstCount\n",
+                                       __func__);
+                               rc = -EIO;
+                               break;
+                       }
+                       bytes2write = min_t(size_t, burst_count,
+                                           len - 1 - count);
+                       rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W,
+                                                  bytes2write, &buf[count]);
+                       if (rc < 0) {
+                               dev_err(dev, "%s() fail i2cWriteBuf\n",
+                                       __func__);
+                               break;
+                       }
+                       dev_dbg(dev, "%s(%d):", __func__, bytes2write);
+                       count += bytes2write;
+                       rc = i2c_nuvoton_wait_for_stat(chip,
+                                                      TPM_STS_VALID |
+                                                      TPM_STS_EXPECT,
+                                                      TPM_STS_VALID |
+                                                      TPM_STS_EXPECT,
+                                                      chip->vendor.timeout_c,
+                                                      NULL);
+                       if (rc < 0) {
+                               dev_err(dev, "%s() timeout on Expect\n",
+                                       __func__);
+                               rc = -ETIMEDOUT;
+                               break;
+                       }
+               }
+               if (rc < 0)
+                       continue;
+
+               /* write last byte */
+               rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W, 1,
+                                          &buf[count]);
+               if (rc < 0) {
+                       dev_err(dev, "%s() fail to write last byte\n",
+                               __func__);
+                       rc = -EIO;
+                       continue;
+               }
+               dev_dbg(dev, "%s(last): %02x", __func__, buf[count]);
+               rc = i2c_nuvoton_wait_for_stat(chip,
+                                              TPM_STS_VALID | TPM_STS_EXPECT,
+                                              TPM_STS_VALID,
+                                              chip->vendor.timeout_c, NULL);
+               if (rc) {
+                       dev_err(dev, "%s() timeout on Expect to clear\n",
+                               __func__);
+                       rc = -ETIMEDOUT;
+                       continue;
+               }
+               break;
+       }
+       if (rc < 0) {
+               /* retries == TPM_RETRY */
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+       /* execute the TPM command */
+       rc = i2c_nuvoton_write_status(client, TPM_STS_GO);
+       if (rc < 0) {
+               dev_err(dev, "%s() fail to write Go\n", __func__);
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+       ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
+       rc = i2c_nuvoton_wait_for_data_avail(chip,
+                                            tpm_calc_ordinal_duration(chip,
+                                                                      ordinal),
+                                            &chip->vendor.read_queue);
+       if (rc) {
+               dev_err(dev, "%s() timeout command duration\n", __func__);
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+
+       dev_dbg(dev, "%s() -> %zd\n", __func__, len);
+       return len;
+}
+
+static bool i2c_nuvoton_req_canceled(struct tpm_chip *chip, u8 status)
+{
+       return (status == TPM_STS_COMMAND_READY);
+}
+
+static const struct file_operations i2c_nuvoton_ops = {
+       .owner = THIS_MODULE,
+       .llseek = no_llseek,
+       .open = tpm_open,
+       .read = tpm_read,
+       .write = tpm_write,
+       .release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+
+static struct attribute *i2c_nuvoton_attrs[] = {
+       &dev_attr_pubek.attr,
+       &dev_attr_pcrs.attr,
+       &dev_attr_enabled.attr,
+       &dev_attr_active.attr,
+       &dev_attr_owned.attr,
+       &dev_attr_temp_deactivated.attr,
+       &dev_attr_caps.attr,
+       &dev_attr_cancel.attr,
+       &dev_attr_durations.attr,
+       &dev_attr_timeouts.attr,
+       NULL,
+};
+
+static struct attribute_group i2c_nuvoton_attr_grp = {
+       .attrs = i2c_nuvoton_attrs
+};
+
+static const struct tpm_vendor_specific tpm_i2c = {
+       .status = i2c_nuvoton_read_status,
+       .recv = i2c_nuvoton_recv,
+       .send = i2c_nuvoton_send,
+       .cancel = i2c_nuvoton_ready,
+       .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+       .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+       .req_canceled = i2c_nuvoton_req_canceled,
+       .attr_group = &i2c_nuvoton_attr_grp,
+       .miscdev.fops = &i2c_nuvoton_ops,
+};
+
+/* The only purpose for the handler is to signal to any waiting threads that
+ * the interrupt is currently being asserted. The driver does not do any
+ * processing triggered by interrupts, and the chip provides no way to mask at
+ * the source (plus that would be slow over I2C). Run the IRQ as a one-shot,
+ * this means it cannot be shared. */
+static irqreturn_t i2c_nuvoton_int_handler(int dummy, void *dev_id)
+{
+       struct tpm_chip *chip = dev_id;
+       struct priv_data *priv = chip->vendor.priv;
+
+       priv->intrs++;
+       wake_up(&chip->vendor.read_queue);
+       disable_irq_nosync(chip->vendor.irq);
+       return IRQ_HANDLED;
+}
+
+static int get_vid(struct i2c_client *client, u32 *res)
+{
+       static const u8 vid_did_rid_value[] = { 0x50, 0x10, 0xfe };
+       u32 temp;
+       s32 rc;
+
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+               return -ENODEV;
+       rc = i2c_nuvoton_read_buf(client, TPM_VID_DID_RID, 4, (u8 *)&temp);
+       if (rc < 0)
+               return rc;
+
+       /* check WPCT301 values - ignore RID */
+       if (memcmp(&temp, vid_did_rid_value, sizeof(vid_did_rid_value))) {
+               /*
+                * f/w rev 2.81 has an issue where the VID_DID_RID is not
+                * reporting the right value. so give it another chance at
+                * offset 0x20 (FIFO_W).
+                */
+               rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_W, 4,
+                                         (u8 *) (&temp));
+               if (rc < 0)
+                       return rc;
+
+               /* check WPCT301 values - ignore RID */
+               if (memcmp(&temp, vid_did_rid_value,
+                          sizeof(vid_did_rid_value)))
+                       return -ENODEV;
+       }
+
+       *res = temp;
+       return 0;
+}
+
+static int i2c_nuvoton_probe(struct i2c_client *client,
+                            const struct i2c_device_id *id)
+{
+       int rc;
+       struct tpm_chip *chip;
+       struct device *dev = &client->dev;
+       u32 vid = 0;
+
+       rc = get_vid(client, &vid);
+       if (rc)
+               return rc;
+
+       dev_info(dev, "VID: %04X DID: %02X RID: %02X\n", (u16) vid,
+                (u8) (vid >> 16), (u8) (vid >> 24));
+
+       chip = tpm_register_hardware(dev, &tpm_i2c);
+       if (!chip) {
+               dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
+               return -ENODEV;
+       }
+
+       chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
+                                        GFP_KERNEL);
+       init_waitqueue_head(&chip->vendor.read_queue);
+       init_waitqueue_head(&chip->vendor.int_queue);
+
+       /* Default timeouts */
+       chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT);
+       chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+
+       /*
+        * I2C intfcaps (interrupt capabilitieis) in the chip are hard coded to:
+        *   TPM_INTF_INT_LEVEL_LOW | TPM_INTF_DATA_AVAIL_INT
+        * The IRQ should be set in the i2c_board_info (which is done
+        * automatically in of_i2c_register_devices, for device tree users */
+       chip->vendor.irq = client->irq;
+
+       if (chip->vendor.irq) {
+               dev_dbg(dev, "%s() chip-vendor.irq\n", __func__);
+               rc = devm_request_irq(dev, chip->vendor.irq,
+                                     i2c_nuvoton_int_handler,
+                                     IRQF_TRIGGER_LOW,
+                                     chip->vendor.miscdev.name,
+                                     chip);
+               if (rc) {
+                       dev_err(dev, "%s() Unable to request irq: %d for use\n",
+                               __func__, chip->vendor.irq);
+                       chip->vendor.irq = 0;
+               } else {
+                       /* Clear any pending interrupt */
+                       i2c_nuvoton_ready(chip);
+                       /* - wait for TPM_STS==0xA0 (stsValid, commandReady) */
+                       rc = i2c_nuvoton_wait_for_stat(chip,
+                                                      TPM_STS_COMMAND_READY,
+                                                      TPM_STS_COMMAND_READY,
+                                                      chip->vendor.timeout_b,
+                                                      NULL);
+                       if (rc == 0) {
+                               /*
+                                * TIS is in ready state
+                                * write dummy byte to enter reception state
+                                * TPM_DATA_FIFO_W <- rc (0)
+                                */
+                               rc = i2c_nuvoton_write_buf(client,
+                                                          TPM_DATA_FIFO_W,
+                                                          1, (u8 *) (&rc));
+                               if (rc < 0)
+                                       goto out_err;
+                               /* TPM_STS <- 0x40 (commandReady) */
+                               i2c_nuvoton_ready(chip);
+                       } else {
+                               /*
+                                * timeout_b reached - command was
+                                * aborted. TIS should now be in idle state -
+                                * only TPM_STS_VALID should be set
+                                */
+                               if (i2c_nuvoton_read_status(chip) !=
+                                   TPM_STS_VALID) {
+                                       rc = -EIO;
+                                       goto out_err;
+                               }
+                       }
+               }
+       }
+
+       if (tpm_get_timeouts(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       if (tpm_do_selftest(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       return 0;
+
+out_err:
+       tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(chip->dev);
+       return rc;
+}
+
+static int i2c_nuvoton_remove(struct i2c_client *client)
+{
+       struct device *dev = &(client->dev);
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip)
+               tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(dev);
+       kfree(chip);
+       return 0;
+}
+
+
+static const struct i2c_device_id i2c_nuvoton_id[] = {
+       {I2C_DRIVER_NAME, 0},
+       {}
+};
+MODULE_DEVICE_TABLE(i2c, i2c_nuvoton_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_nuvoton_of_match[] = {
+       {.compatible = "nuvoton,npct501"},
+       {.compatible = "winbond,wpct301"},
+       {},
+};
+MODULE_DEVICE_TABLE(of, i2c_nuvoton_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume);
+
+static struct i2c_driver i2c_nuvoton_driver = {
+       .id_table = i2c_nuvoton_id,
+       .probe = i2c_nuvoton_probe,
+       .remove = i2c_nuvoton_remove,
+       .driver = {
+               .name = I2C_DRIVER_NAME,
+               .owner = THIS_MODULE,
+               .pm = &i2c_nuvoton_pm_ops,
+               .of_match_table = of_match_ptr(i2c_nuvoton_of_match),
+       },
+};
+
+module_i2c_driver(i2c_nuvoton_driver);
+
+MODULE_AUTHOR("Dan Morav (dan.morav@nuvoton.com)");
+MODULE_DESCRIPTION("Nuvoton TPM I2C Driver");
+MODULE_LICENSE("GPL");
index 5bb8e2ddd3b3b9b3a51557a605e8675a8f5c3e64..a0d6ceb5d00551b2c10f4341d1b5b5bd2d812e0b 100644 (file)
@@ -584,7 +584,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
 static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
 static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
 static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 
 static struct attribute *stm_tpm_attrs[] = {
@@ -746,8 +746,6 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
        tpm_get_timeouts(chip);
 
-       i2c_set_clientdata(client, chip);
-
        dev_info(chip->dev, "TPM I2C Initialized\n");
        return 0;
 _irq_set:
@@ -807,24 +805,18 @@ static int tpm_st33_i2c_remove(struct i2c_client *client)
 #ifdef CONFIG_PM_SLEEP
 /*
  * tpm_st33_i2c_pm_suspend suspend the TPM device
- * Added: Work around when suspend and no tpm application is running, suspend
- * may fail because chip->data_buffer is not set (only set in tpm_open in Linux
- * TPM core)
  * @param: client, the i2c_client drescription (TPM I2C description).
  * @param: mesg, the power management message.
  * @return: 0 in case of success.
  */
 static int tpm_st33_i2c_pm_suspend(struct device *dev)
 {
-       struct tpm_chip *chip = dev_get_drvdata(dev);
        struct st33zp24_platform_data *pin_infos = dev->platform_data;
        int ret = 0;
 
        if (power_mgt) {
                gpio_set_value(pin_infos->io_lpcpd, 0);
        } else {
-               if (chip->data_buffer == NULL)
-                       chip->data_buffer = pin_infos->tpm_i2c_buffer[0];
                ret = tpm_pm_suspend(dev);
        }
        return ret;
@@ -849,8 +841,6 @@ static int tpm_st33_i2c_pm_resume(struct device *dev)
                                          TPM_STS_VALID) == TPM_STS_VALID,
                                          chip->vendor.timeout_b);
        } else {
-               if (chip->data_buffer == NULL)
-                       chip->data_buffer = pin_infos->tpm_i2c_buffer[0];
                ret = tpm_pm_resume(dev);
                if (!ret)
                        tpm_do_selftest(chip);
index 56b07c35a13e173bf1cc5d10321d30d5a11f259e..2783a42aa73295406bee2ff845f5ae08a2d9c738 100644 (file)
@@ -98,7 +98,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 
        if (count < len) {
                dev_err(ibmvtpm->dev,
-                       "Invalid size in recv: count=%ld, crq_size=%d\n",
+                       "Invalid size in recv: count=%zd, crq_size=%d\n",
                        count, len);
                return -EIO;
        }
@@ -136,7 +136,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 
        if (count > ibmvtpm->rtce_size) {
                dev_err(ibmvtpm->dev,
-                       "Invalid size in send: count=%ld, rtce_size=%d\n",
+                       "Invalid size in send: count=%zd, rtce_size=%d\n",
                        count, ibmvtpm->rtce_size);
                return -EIO;
        }
@@ -419,7 +419,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
 static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
 static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
                   NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
 static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
index 2168d15bc728e22c99a61976de5050c26dc33cbe..8e562dc656016cd9c4dbeaaa3dac0a96a02d0f26 100644 (file)
@@ -452,12 +452,8 @@ int tpm_add_ppi(struct kobject *parent)
 {
        return sysfs_create_group(parent, &ppi_attr_grp);
 }
-EXPORT_SYMBOL_GPL(tpm_add_ppi);
 
 void tpm_remove_ppi(struct kobject *parent)
 {
        sysfs_remove_group(parent, &ppi_attr_grp);
 }
-EXPORT_SYMBOL_GPL(tpm_remove_ppi);
-
-MODULE_LICENSE("GPL");
index 5796d0157ce0c3bbd82c662bf61f8035e9d9daf8..1b74459c072399109d1730a6d60471e64ffcc48b 100644 (file)
@@ -448,7 +448,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
 static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
 static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
                   NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
 static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
index 94c280d36e8b3bfaea00ee36c4fe3215818b3c86..c8ff4df81779f3b66a3e62abab055b4e404b81e7 100644 (file)
@@ -351,8 +351,6 @@ static int tpmfront_probe(struct xenbus_device *dev,
 
        tpm_get_timeouts(priv->chip);
 
-       dev_set_drvdata(&dev->dev, priv->chip);
-
        return rv;
 }
 
index 218460fcd2e4f9294d24b84680f173f838c11332..25a70d06c5bf243efe85ff3ed2dfbd2a7cc590df 100644 (file)
@@ -68,6 +68,9 @@ static void cs_check_cpu(int cpu, unsigned int load)
 
                dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
 
+               if (dbs_info->requested_freq > policy->max)
+                       dbs_info->requested_freq = policy->max;
+
                __cpufreq_driver_target(policy, dbs_info->requested_freq,
                        CPUFREQ_RELATION_H);
                return;
index 0806c31e57645a287490c4d851d2760cd9484ca0..e6be63561fa699a28053c7528056dcd08ffb2285 100644 (file)
@@ -328,10 +328,6 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
                                             dbs_data->cdata->gov_dbs_timer);
                }
 
-               /*
-                * conservative does not implement micro like ondemand
-                * governor, thus we are bound to jiffes/HZ
-                */
                if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
                        cs_dbs_info->down_skip = 0;
                        cs_dbs_info->enable = 1;
index be6d14307aa81a7fa3e3626953b5a43b514933e2..a0acd0bfba40a361f3ea9eaafc42db928968dea7 100644 (file)
@@ -53,6 +53,7 @@ static unsigned int omap_getspeed(unsigned int cpu)
 
 static int omap_target(struct cpufreq_policy *policy, unsigned int index)
 {
+       int r, ret;
        struct dev_pm_opp *opp;
        unsigned long freq, volt = 0, volt_old = 0, tol = 0;
        unsigned int old_freq, new_freq;
index dd2874ec1927a6dae0e97defce4dfc05aef38d87..446687cc2334ed2f60a0b6f0170382fe733aae01 100644 (file)
@@ -89,14 +89,15 @@ config AT_HDMAC
          Support the Atmel AHB DMA controller.
 
 config FSL_DMA
-       tristate "Freescale Elo and Elo Plus DMA support"
+       tristate "Freescale Elo series DMA support"
        depends on FSL_SOC
        select DMA_ENGINE
        select ASYNC_TX_ENABLE_CHANNEL_SWITCH
        ---help---
-         Enable support for the Freescale Elo and Elo Plus DMA controllers.
-         The Elo is the DMA controller on some 82xx and 83xx parts, and the
-         Elo Plus is the DMA controller on 85xx and 86xx parts.
+         Enable support for the Freescale Elo series DMA controllers.
+         The Elo is the DMA controller on some mpc82xx and mpc83xx parts, the
+         EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
+         some Txxx and Bxxx parts.
 
 config MPC512X_DMA
        tristate "Freescale MPC512x built-in DMA engine support"
index e51a9832ef0d06801fd151b376824a7ac3d697b7..16a2aa28f85672689f66c37039a8e72230e71b02 100644 (file)
@@ -1164,42 +1164,12 @@ static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
        kfree(txd);
 }
 
-static void pl08x_unmap_buffers(struct pl08x_txd *txd)
-{
-       struct device *dev = txd->vd.tx.chan->device->dev;
-       struct pl08x_sg *dsg;
-
-       if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       list_for_each_entry(dsg, &txd->dsg_list, node)
-                               dma_unmap_single(dev, dsg->src_addr, dsg->len,
-                                               DMA_TO_DEVICE);
-               else {
-                       list_for_each_entry(dsg, &txd->dsg_list, node)
-                               dma_unmap_page(dev, dsg->src_addr, dsg->len,
-                                               DMA_TO_DEVICE);
-               }
-       }
-       if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       list_for_each_entry(dsg, &txd->dsg_list, node)
-                               dma_unmap_single(dev, dsg->dst_addr, dsg->len,
-                                               DMA_FROM_DEVICE);
-               else
-                       list_for_each_entry(dsg, &txd->dsg_list, node)
-                               dma_unmap_page(dev, dsg->dst_addr, dsg->len,
-                                               DMA_FROM_DEVICE);
-       }
-}
-
 static void pl08x_desc_free(struct virt_dma_desc *vd)
 {
        struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
        struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan);
 
-       if (!plchan->slave)
-               pl08x_unmap_buffers(txd);
-
+       dma_descriptor_unmap(txd);
        if (!txd->done)
                pl08x_release_mux(plchan);
 
@@ -1252,7 +1222,7 @@ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
        size_t bytes = 0;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        /*
@@ -1267,7 +1237,7 @@ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
 
        spin_lock_irqsave(&plchan->vc.lock, flags);
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_SUCCESS) {
+       if (ret != DMA_COMPLETE) {
                vd = vchan_find_desc(&plchan->vc, cookie);
                if (vd) {
                        /* On the issued list, so hasn't been processed yet */
@@ -2138,8 +2108,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
        writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
        writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
 
-       ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
-                         DRIVER_NAME, pl08x);
+       ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
        if (ret) {
                dev_err(&adev->dev, "%s failed to request interrupt %d\n",
                        __func__, adev->irq[0]);
index c787f38a186a008a6cf8fa4af1dc9d19cab8f836..e2c04dc81e2a903ea13956e872150c40ed8b00e8 100644 (file)
@@ -344,31 +344,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
        /* move myself to free_list */
        list_move(&desc->desc_node, &atchan->free_list);
 
-       /* unmap dma addresses (not on slave channels) */
-       if (!atchan->chan_common.private) {
-               struct device *parent = chan2parent(&atchan->chan_common);
-               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                               dma_unmap_single(parent,
-                                               desc->lli.daddr,
-                                               desc->len, DMA_FROM_DEVICE);
-                       else
-                               dma_unmap_page(parent,
-                                               desc->lli.daddr,
-                                               desc->len, DMA_FROM_DEVICE);
-               }
-               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                               dma_unmap_single(parent,
-                                               desc->lli.saddr,
-                                               desc->len, DMA_TO_DEVICE);
-                       else
-                               dma_unmap_page(parent,
-                                               desc->lli.saddr,
-                                               desc->len, DMA_TO_DEVICE);
-               }
-       }
-
+       dma_descriptor_unmap(txd);
        /* for cyclic transfers,
         * no need to replay callback function while stopping */
        if (!atc_chan_is_cyclic(atchan)) {
@@ -1102,7 +1078,7 @@ atc_tx_status(struct dma_chan *chan,
        int bytes = 0;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
        /*
         * There's no point calculating the residue if there's
index 31011d2a26fcfa0510b64c1114c1f87c05762ec3..3c6716e0b78eee2592a8c977550ce036ab4318c5 100644 (file)
@@ -2369,7 +2369,7 @@ coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
        enum dma_status ret;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        dma_set_residue(txstate, coh901318_get_bytes_left(chan));
@@ -2694,7 +2694,7 @@ static int __init coh901318_probe(struct platform_device *pdev)
        if (irq < 0)
                return irq;
 
-       err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, IRQF_DISABLED,
+       err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, 0,
                               "coh901318", base);
        if (err)
                return err;
index 7c82b92f9b16f18bf1ce4e6bedb988f5d5b70e22..c29dacff66fa951f136657536a628b6743ccda30 100644 (file)
@@ -141,6 +141,9 @@ struct cppi41_dd {
        const struct chan_queues *queues_rx;
        const struct chan_queues *queues_tx;
        struct chan_queues td_queue;
+
+       /* context for suspend/resume */
+       unsigned int dma_tdfdq;
 };
 
 #define FIST_COMPLETION_QUEUE  93
@@ -263,6 +266,15 @@ static u32 pd_trans_len(u32 val)
        return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1);
 }
 
+static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
+{
+       u32 desc;
+
+       desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
+       desc &= ~0x1f;
+       return desc;
+}
+
 static irqreturn_t cppi41_irq(int irq, void *data)
 {
        struct cppi41_dd *cdd = data;
@@ -300,8 +312,7 @@ static irqreturn_t cppi41_irq(int irq, void *data)
                        q_num = __fls(val);
                        val &= ~(1 << q_num);
                        q_num += 32 * i;
-                       desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(q_num));
-                       desc &= ~0x1f;
+                       desc = cppi41_pop_desc(cdd, q_num);
                        c = desc_to_chan(cdd, desc);
                        if (WARN_ON(!c)) {
                                pr_err("%s() q %d desc %08x\n", __func__,
@@ -353,7 +364,7 @@ static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
 
        /* lock */
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (txstate && ret == DMA_SUCCESS)
+       if (txstate && ret == DMA_COMPLETE)
                txstate->residue = c->residue;
        /* unlock */
 
@@ -517,15 +528,6 @@ static void cppi41_compute_td_desc(struct cppi41_desc *d)
        d->pd0 = DESC_TYPE_TEARD << DESC_TYPE;
 }
 
-static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
-{
-       u32 desc;
-
-       desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
-       desc &= ~0x1f;
-       return desc;
-}
-
 static int cppi41_tear_down_chan(struct cppi41_channel *c)
 {
        struct cppi41_dd *cdd = c->cdd;
@@ -561,36 +563,26 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
                c->td_retry = 100;
        }
 
-       if (!c->td_seen) {
-               unsigned td_comp_queue;
+       if (!c->td_seen || !c->td_desc_seen) {
 
-               if (c->is_tx)
-                       td_comp_queue =  cdd->td_queue.complete;
-               else
-                       td_comp_queue =  c->q_comp_num;
+               desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
+               if (!desc_phys)
+                       desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
 
-               desc_phys = cppi41_pop_desc(cdd, td_comp_queue);
-               if (desc_phys) {
-                       __iormb();
+               if (desc_phys == c->desc_phys) {
+                       c->td_desc_seen = 1;
+
+               } else if (desc_phys == td_desc_phys) {
+                       u32 pd0;
 
-                       if (desc_phys == td_desc_phys) {
-                               u32 pd0;
-                               pd0 = td->pd0;
-                               WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
-                               WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
-                               WARN_ON((pd0 & 0x1f) != c->port_num);
-                       } else {
-                               WARN_ON_ONCE(1);
-                       }
-                       c->td_seen = 1;
-               }
-       }
-       if (!c->td_desc_seen) {
-               desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
-               if (desc_phys) {
                        __iormb();
-                       WARN_ON(c->desc_phys != desc_phys);
-                       c->td_desc_seen = 1;
+                       pd0 = td->pd0;
+                       WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
+                       WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
+                       WARN_ON((pd0 & 0x1f) != c->port_num);
+                       c->td_seen = 1;
+               } else if (desc_phys) {
+                       WARN_ON_ONCE(1);
                }
        }
        c->td_retry--;
@@ -609,7 +601,7 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
 
        WARN_ON(!c->td_retry);
        if (!c->td_desc_seen) {
-               desc_phys = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
+               desc_phys = cppi41_pop_desc(cdd, c->q_num);
                WARN_ON(!desc_phys);
        }
 
@@ -674,14 +666,14 @@ static void cleanup_chans(struct cppi41_dd *cdd)
        }
 }
 
-static int cppi41_add_chans(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 {
        struct cppi41_channel *cchan;
        int i;
        int ret;
        u32 n_chans;
 
-       ret = of_property_read_u32(pdev->dev.of_node, "#dma-channels",
+       ret = of_property_read_u32(dev->of_node, "#dma-channels",
                        &n_chans);
        if (ret)
                return ret;
@@ -719,7 +711,7 @@ err:
        return -ENOMEM;
 }
 
-static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
+static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
 {
        unsigned int mem_decs;
        int i;
@@ -731,7 +723,7 @@ static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
                cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i));
                cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i));
 
-               dma_free_coherent(&pdev->dev, mem_decs, cdd->cd,
+               dma_free_coherent(dev, mem_decs, cdd->cd,
                                cdd->descs_phys);
        }
 }
@@ -741,19 +733,19 @@ static void disable_sched(struct cppi41_dd *cdd)
        cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
 }
 
-static void deinit_cpii41(struct platform_device *pdev, struct cppi41_dd *cdd)
+static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd)
 {
        disable_sched(cdd);
 
-       purge_descs(pdev, cdd);
+       purge_descs(dev, cdd);
 
        cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
        cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
-       dma_free_coherent(&pdev->dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
+       dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
                        cdd->scratch_phys);
 }
 
-static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int init_descs(struct device *dev, struct cppi41_dd *cdd)
 {
        unsigned int desc_size;
        unsigned int mem_decs;
@@ -777,7 +769,7 @@ static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
                reg |= ilog2(ALLOC_DECS_NUM) - 5;
 
                BUILD_BUG_ON(DESCS_AREAS != 1);
-               cdd->cd = dma_alloc_coherent(&pdev->dev, mem_decs,
+               cdd->cd = dma_alloc_coherent(dev, mem_decs,
                                &cdd->descs_phys, GFP_KERNEL);
                if (!cdd->cd)
                        return -ENOMEM;
@@ -813,12 +805,12 @@ static void init_sched(struct cppi41_dd *cdd)
        cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
 }
 
-static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
 {
        int ret;
 
        BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1));
-       cdd->qmgr_scratch = dma_alloc_coherent(&pdev->dev, QMGR_SCRATCH_SIZE,
+       cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE,
                        &cdd->scratch_phys, GFP_KERNEL);
        if (!cdd->qmgr_scratch)
                return -ENOMEM;
@@ -827,7 +819,7 @@ static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
        cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
        cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
 
-       ret = init_descs(pdev, cdd);
+       ret = init_descs(dev, cdd);
        if (ret)
                goto err_td;
 
@@ -835,7 +827,7 @@ static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
        init_sched(cdd);
        return 0;
 err_td:
-       deinit_cpii41(pdev, cdd);
+       deinit_cppi41(dev, cdd);
        return ret;
 }
 
@@ -914,11 +906,11 @@ static const struct of_device_id cppi41_dma_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
 
-static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev)
+static const struct cppi_glue_infos *get_glue_info(struct device *dev)
 {
        const struct of_device_id *of_id;
 
-       of_id = of_match_node(cppi41_dma_ids, pdev->dev.of_node);
+       of_id = of_match_node(cppi41_dma_ids, dev->of_node);
        if (!of_id)
                return NULL;
        return of_id->data;
@@ -927,11 +919,12 @@ static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev)
 static int cppi41_dma_probe(struct platform_device *pdev)
 {
        struct cppi41_dd *cdd;
+       struct device *dev = &pdev->dev;
        const struct cppi_glue_infos *glue_info;
        int irq;
        int ret;
 
-       glue_info = get_glue_info(pdev);
+       glue_info = get_glue_info(dev);
        if (!glue_info)
                return -EINVAL;
 
@@ -946,14 +939,14 @@ static int cppi41_dma_probe(struct platform_device *pdev)
        cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
        cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
        cdd->ddev.device_control = cppi41_dma_control;
-       cdd->ddev.dev = &pdev->dev;
+       cdd->ddev.dev = dev;
        INIT_LIST_HEAD(&cdd->ddev.channels);
        cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
 
-       cdd->usbss_mem = of_iomap(pdev->dev.of_node, 0);
-       cdd->ctrl_mem = of_iomap(pdev->dev.of_node, 1);
-       cdd->sched_mem = of_iomap(pdev->dev.of_node, 2);
-       cdd->qmgr_mem = of_iomap(pdev->dev.of_node, 3);
+       cdd->usbss_mem = of_iomap(dev->of_node, 0);
+       cdd->ctrl_mem = of_iomap(dev->of_node, 1);
+       cdd->sched_mem = of_iomap(dev->of_node, 2);
+       cdd->qmgr_mem = of_iomap(dev->of_node, 3);
 
        if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem ||
                        !cdd->qmgr_mem) {
@@ -961,31 +954,31 @@ static int cppi41_dma_probe(struct platform_device *pdev)
                goto err_remap;
        }
 
-       pm_runtime_enable(&pdev->dev);
-       ret = pm_runtime_get_sync(&pdev->dev);
-       if (ret)
+       pm_runtime_enable(dev);
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0)
                goto err_get_sync;
 
        cdd->queues_rx = glue_info->queues_rx;
        cdd->queues_tx = glue_info->queues_tx;
        cdd->td_queue = glue_info->td_queue;
 
-       ret = init_cppi41(pdev, cdd);
+       ret = init_cppi41(dev, cdd);
        if (ret)
                goto err_init_cppi;
 
-       ret = cppi41_add_chans(pdev, cdd);
+       ret = cppi41_add_chans(dev, cdd);
        if (ret)
                goto err_chans;
 
-       irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+       irq = irq_of_parse_and_map(dev->of_node, 0);
        if (!irq)
                goto err_irq;
 
        cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER);
 
        ret = request_irq(irq, glue_info->isr, IRQF_SHARED,
-                       dev_name(&pdev->dev), cdd);
+                       dev_name(dev), cdd);
        if (ret)
                goto err_irq;
        cdd->irq = irq;
@@ -994,7 +987,7 @@ static int cppi41_dma_probe(struct platform_device *pdev)
        if (ret)
                goto err_dma_reg;
 
-       ret = of_dma_controller_register(pdev->dev.of_node,
+       ret = of_dma_controller_register(dev->of_node,
                        cppi41_dma_xlate, &cpp41_dma_info);
        if (ret)
                goto err_of;
@@ -1009,11 +1002,11 @@ err_irq:
        cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
        cleanup_chans(cdd);
 err_chans:
-       deinit_cpii41(pdev, cdd);
+       deinit_cppi41(dev, cdd);
 err_init_cppi:
-       pm_runtime_put(&pdev->dev);
+       pm_runtime_put(dev);
 err_get_sync:
-       pm_runtime_disable(&pdev->dev);
+       pm_runtime_disable(dev);
        iounmap(cdd->usbss_mem);
        iounmap(cdd->ctrl_mem);
        iounmap(cdd->sched_mem);
@@ -1033,7 +1026,7 @@ static int cppi41_dma_remove(struct platform_device *pdev)
        cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
        free_irq(cdd->irq, cdd);
        cleanup_chans(cdd);
-       deinit_cpii41(pdev, cdd);
+       deinit_cppi41(&pdev->dev, cdd);
        iounmap(cdd->usbss_mem);
        iounmap(cdd->ctrl_mem);
        iounmap(cdd->sched_mem);
@@ -1044,12 +1037,53 @@ static int cppi41_dma_remove(struct platform_device *pdev)
        return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int cppi41_suspend(struct device *dev)
+{
+       struct cppi41_dd *cdd = dev_get_drvdata(dev);
+
+       cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ);
+       cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
+       disable_sched(cdd);
+
+       return 0;
+}
+
+static int cppi41_resume(struct device *dev)
+{
+       struct cppi41_dd *cdd = dev_get_drvdata(dev);
+       struct cppi41_channel *c;
+       int i;
+
+       for (i = 0; i < DESCS_AREAS; i++)
+               cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
+
+       list_for_each_entry(c, &cdd->ddev.channels, chan.device_node)
+               if (!c->is_tx)
+                       cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
+
+       init_sched(cdd);
+
+       cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ);
+       cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+       cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+       cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
+
+       cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER);
+
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(cppi41_pm_ops, cppi41_suspend, cppi41_resume);
+
 static struct platform_driver cpp41_dma_driver = {
        .probe  = cppi41_dma_probe,
        .remove = cppi41_dma_remove,
        .driver = {
                .name = "cppi41-dma-engine",
                .owner = THIS_MODULE,
+               .pm = &cppi41_pm_ops,
                .of_match_table = of_match_ptr(cppi41_dma_ids),
        },
 };
index b0c0c8268d42bb023ac94ca8f420e22a5a197c37..94c380f0753860c4c4002c19f4217332fa2460f0 100644 (file)
@@ -491,7 +491,7 @@ static enum dma_status jz4740_dma_tx_status(struct dma_chan *c,
        unsigned long flags;
 
        status = dma_cookie_status(c, cookie, state);
-       if (status == DMA_SUCCESS || !state)
+       if (status == DMA_COMPLETE || !state)
                return status;
 
        spin_lock_irqsave(&chan->vchan.lock, flags);
index 9162ac80c18f303ac9a509eb97298eba33d4753b..ea806bdc12ef92418c528be0b950758de59c3ee7 100644 (file)
@@ -65,6 +65,7 @@
 #include <linux/acpi.h>
 #include <linux/acpi_dma.h>
 #include <linux/of_dma.h>
+#include <linux/mempool.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
 static DEFINE_IDR(dma_idr);
@@ -901,98 +902,132 @@ void dma_async_device_unregister(struct dma_device *device)
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
-/**
- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
- * @chan: DMA channel to offload copy to
- * @dest: destination address (virtual)
- * @src: source address (virtual)
- * @len: length
- *
- * Both @dest and @src must be mappable to a bus address according to the
- * DMA mapping API rules for streaming mappings.
- * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
-                       void *src, size_t len)
-{
-       struct dma_device *dev = chan->device;
-       struct dma_async_tx_descriptor *tx;
-       dma_addr_t dma_dest, dma_src;
-       dma_cookie_t cookie;
-       unsigned long flags;
+struct dmaengine_unmap_pool {
+       struct kmem_cache *cache;
+       const char *name;
+       mempool_t *pool;
+       size_t size;
+};
 
-       dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
-       dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
-       flags = DMA_CTRL_ACK |
-               DMA_COMPL_SRC_UNMAP_SINGLE |
-               DMA_COMPL_DEST_UNMAP_SINGLE;
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) }
+static struct dmaengine_unmap_pool unmap_pool[] = {
+       __UNMAP_POOL(2),
+       #if IS_ENABLED(CONFIG_ASYNC_TX_DMA)
+       __UNMAP_POOL(16),
+       __UNMAP_POOL(128),
+       __UNMAP_POOL(256),
+       #endif
+};
 
-       if (!tx) {
-               dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
-               dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
-               return -ENOMEM;
+static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
+{
+       int order = get_count_order(nr);
+
+       switch (order) {
+       case 0 ... 1:
+               return &unmap_pool[0];
+       case 2 ... 4:
+               return &unmap_pool[1];
+       case 5 ... 7:
+               return &unmap_pool[2];
+       case 8:
+               return &unmap_pool[3];
+       default:
+               BUG();
+               return NULL;
        }
+}
 
-       tx->callback = NULL;
-       cookie = tx->tx_submit(tx);
+static void dmaengine_unmap(struct kref *kref)
+{
+       struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref);
+       struct device *dev = unmap->dev;
+       int cnt, i;
+
+       cnt = unmap->to_cnt;
+       for (i = 0; i < cnt; i++)
+               dma_unmap_page(dev, unmap->addr[i], unmap->len,
+                              DMA_TO_DEVICE);
+       cnt += unmap->from_cnt;
+       for (; i < cnt; i++)
+               dma_unmap_page(dev, unmap->addr[i], unmap->len,
+                              DMA_FROM_DEVICE);
+       cnt += unmap->bidi_cnt;
+       for (; i < cnt; i++) {
+               if (unmap->addr[i] == 0)
+                       continue;
+               dma_unmap_page(dev, unmap->addr[i], unmap->len,
+                              DMA_BIDIRECTIONAL);
+       }
+       mempool_free(unmap, __get_unmap_pool(cnt)->pool);
+}
 
-       preempt_disable();
-       __this_cpu_add(chan->local->bytes_transferred, len);
-       __this_cpu_inc(chan->local->memcpy_count);
-       preempt_enable();
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+       if (unmap)
+               kref_put(&unmap->kref, dmaengine_unmap);
+}
+EXPORT_SYMBOL_GPL(dmaengine_unmap_put);
 
-       return cookie;
+static void dmaengine_destroy_unmap_pool(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+               struct dmaengine_unmap_pool *p = &unmap_pool[i];
+
+               if (p->pool)
+                       mempool_destroy(p->pool);
+               p->pool = NULL;
+               if (p->cache)
+                       kmem_cache_destroy(p->cache);
+               p->cache = NULL;
+       }
 }
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
 
-/**
- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
- * @chan: DMA channel to offload copy to
- * @page: destination page
- * @offset: offset in page to copy to
- * @kdata: source address (virtual)
- * @len: length
- *
- * Both @page/@offset and @kdata must be mappable to a bus address according
- * to the DMA mapping API rules for streaming mappings.
- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
- * locked user space pages)
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
-                       unsigned int offset, void *kdata, size_t len)
+static int __init dmaengine_init_unmap_pool(void)
 {
-       struct dma_device *dev = chan->device;
-       struct dma_async_tx_descriptor *tx;
-       dma_addr_t dma_dest, dma_src;
-       dma_cookie_t cookie;
-       unsigned long flags;
+       int i;
 
-       dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
-       dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
-       flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE;
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+       for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+               struct dmaengine_unmap_pool *p = &unmap_pool[i];
+               size_t size;
 
-       if (!tx) {
-               dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
-               dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
-               return -ENOMEM;
+               size = sizeof(struct dmaengine_unmap_data) +
+                      sizeof(dma_addr_t) * p->size;
+
+               p->cache = kmem_cache_create(p->name, size, 0,
+                                            SLAB_HWCACHE_ALIGN, NULL);
+               if (!p->cache)
+                       break;
+               p->pool = mempool_create_slab_pool(1, p->cache);
+               if (!p->pool)
+                       break;
        }
 
-       tx->callback = NULL;
-       cookie = tx->tx_submit(tx);
+       if (i == ARRAY_SIZE(unmap_pool))
+               return 0;
 
-       preempt_disable();
-       __this_cpu_add(chan->local->bytes_transferred, len);
-       __this_cpu_inc(chan->local->memcpy_count);
-       preempt_enable();
+       dmaengine_destroy_unmap_pool();
+       return -ENOMEM;
+}
 
-       return cookie;
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+       struct dmaengine_unmap_data *unmap;
+
+       unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags);
+       if (!unmap)
+               return NULL;
+
+       memset(unmap, 0, sizeof(*unmap));
+       kref_init(&unmap->kref);
+       unmap->dev = dev;
+
+       return unmap;
 }
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+EXPORT_SYMBOL(dmaengine_get_unmap_data);
 
 /**
  * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
@@ -1015,24 +1050,33 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
 {
        struct dma_device *dev = chan->device;
        struct dma_async_tx_descriptor *tx;
-       dma_addr_t dma_dest, dma_src;
+       struct dmaengine_unmap_data *unmap;
        dma_cookie_t cookie;
        unsigned long flags;
 
-       dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
-       dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
-                               DMA_FROM_DEVICE);
+       unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOIO);
+       if (!unmap)
+               return -ENOMEM;
+
+       unmap->to_cnt = 1;
+       unmap->from_cnt = 1;
+       unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len,
+                                     DMA_TO_DEVICE);
+       unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
+                                     DMA_FROM_DEVICE);
+       unmap->len = len;
        flags = DMA_CTRL_ACK;
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+       tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
+                                        len, flags);
 
        if (!tx) {
-               dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
-               dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+               dmaengine_unmap_put(unmap);
                return -ENOMEM;
        }
 
-       tx->callback = NULL;
+       dma_set_unmap(tx, unmap);
        cookie = tx->tx_submit(tx);
+       dmaengine_unmap_put(unmap);
 
        preempt_disable();
        __this_cpu_add(chan->local->bytes_transferred, len);
@@ -1043,6 +1087,52 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
 }
 EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
 
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+                           void *src, size_t len)
+{
+       return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest),
+                                        (unsigned long) dest & ~PAGE_MASK,
+                                        virt_to_page(src),
+                                        (unsigned long) src & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+                          unsigned int offset, void *kdata, size_t len)
+{
+       return dma_async_memcpy_pg_to_pg(chan, page, offset,
+                                        virt_to_page(kdata),
+                                        (unsigned long) kdata & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
        struct dma_chan *chan)
 {
@@ -1062,7 +1152,7 @@ dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
        unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
 
        if (!tx)
-               return DMA_SUCCESS;
+               return DMA_COMPLETE;
 
        while (tx->cookie == -EBUSY) {
                if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
@@ -1116,6 +1206,10 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies);
 
 static int __init dma_bus_init(void)
 {
+       int err = dmaengine_init_unmap_pool();
+
+       if (err)
+               return err;
        return class_register(&dma_devclass);
 }
 arch_initcall(dma_bus_init);
index 92f796cdc6ab1dc12c6b895fb2d6e16e5dd92264..20f9a3aaf9266ea6daa71a18f08d258afa8a1a1e 100644 (file)
@@ -8,6 +8,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
-#include <linux/ctype.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/seq_file.h>
 
 static unsigned int test_buf_size = 16384;
 module_param(test_buf_size, uint, S_IRUGO | S_IWUSR);
@@ -68,92 +66,13 @@ module_param(timeout, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
                 "Pass -1 for infinite timeout");
 
-/* Maximum amount of mismatched bytes in buffer to print */
-#define MAX_ERROR_COUNT                32
-
-/*
- * Initialization patterns. All bytes in the source buffer has bit 7
- * set, all bytes in the destination buffer has bit 7 cleared.
- *
- * Bit 6 is set for all bytes which are to be copied by the DMA
- * engine. Bit 5 is set for all bytes which are to be overwritten by
- * the DMA engine.
- *
- * The remaining bits are the inverse of a counter which increments by
- * one for each byte address.
- */
-#define PATTERN_SRC            0x80
-#define PATTERN_DST            0x00
-#define PATTERN_COPY           0x40
-#define PATTERN_OVERWRITE      0x20
-#define PATTERN_COUNT_MASK     0x1f
-
-enum dmatest_error_type {
-       DMATEST_ET_OK,
-       DMATEST_ET_MAP_SRC,
-       DMATEST_ET_MAP_DST,
-       DMATEST_ET_PREP,
-       DMATEST_ET_SUBMIT,
-       DMATEST_ET_TIMEOUT,
-       DMATEST_ET_DMA_ERROR,
-       DMATEST_ET_DMA_IN_PROGRESS,
-       DMATEST_ET_VERIFY,
-       DMATEST_ET_VERIFY_BUF,
-};
-
-struct dmatest_verify_buffer {
-       unsigned int    index;
-       u8              expected;
-       u8              actual;
-};
-
-struct dmatest_verify_result {
-       unsigned int                    error_count;
-       struct dmatest_verify_buffer    data[MAX_ERROR_COUNT];
-       u8                              pattern;
-       bool                            is_srcbuf;
-};
-
-struct dmatest_thread_result {
-       struct list_head        node;
-       unsigned int            n;
-       unsigned int            src_off;
-       unsigned int            dst_off;
-       unsigned int            len;
-       enum dmatest_error_type type;
-       union {
-               unsigned long                   data;
-               dma_cookie_t                    cookie;
-               enum dma_status                 status;
-               int                             error;
-               struct dmatest_verify_result    *vr;
-       };
-};
-
-struct dmatest_result {
-       struct list_head        node;
-       char                    *name;
-       struct list_head        results;
-};
-
-struct dmatest_info;
-
-struct dmatest_thread {
-       struct list_head        node;
-       struct dmatest_info     *info;
-       struct task_struct      *task;
-       struct dma_chan         *chan;
-       u8                      **srcs;
-       u8                      **dsts;
-       enum dma_transaction_type type;
-       bool                    done;
-};
+static bool noverify;
+module_param(noverify, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(noverify, "Disable random data setup and verification");
 
-struct dmatest_chan {
-       struct list_head        node;
-       struct dma_chan         *chan;
-       struct list_head        threads;
-};
+static bool verbose;
+module_param(verbose, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(verbose, "Enable \"success\" result messages (default: off)");
 
 /**
  * struct dmatest_params - test parameters.
@@ -177,6 +96,7 @@ struct dmatest_params {
        unsigned int    xor_sources;
        unsigned int    pq_sources;
        int             timeout;
+       bool            noverify;
 };
 
 /**
@@ -184,7 +104,7 @@ struct dmatest_params {
  * @params:            test parameters
  * @lock:              access protection to the fields of this structure
  */
-struct dmatest_info {
+static struct dmatest_info {
        /* Test parameters */
        struct dmatest_params   params;
 
@@ -192,16 +112,95 @@ struct dmatest_info {
        struct list_head        channels;
        unsigned int            nr_channels;
        struct mutex            lock;
+       bool                    did_init;
+} test_info = {
+       .channels = LIST_HEAD_INIT(test_info.channels),
+       .lock = __MUTEX_INITIALIZER(test_info.lock),
+};
+
+static int dmatest_run_set(const char *val, const struct kernel_param *kp);
+static int dmatest_run_get(char *val, const struct kernel_param *kp);
+static struct kernel_param_ops run_ops = {
+       .set = dmatest_run_set,
+       .get = dmatest_run_get,
+};
+static bool dmatest_run;
+module_param_cb(run, &run_ops, &dmatest_run, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(run, "Run the test (default: false)");
+
+/* Maximum amount of mismatched bytes in buffer to print */
+#define MAX_ERROR_COUNT                32
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC            0x80
+#define PATTERN_DST            0x00
+#define PATTERN_COPY           0x40
+#define PATTERN_OVERWRITE      0x20
+#define PATTERN_COUNT_MASK     0x1f
 
-       /* debugfs related stuff */
-       struct dentry           *root;
+struct dmatest_thread {
+       struct list_head        node;
+       struct dmatest_info     *info;
+       struct task_struct      *task;
+       struct dma_chan         *chan;
+       u8                      **srcs;
+       u8                      **dsts;
+       enum dma_transaction_type type;
+       bool                    done;
+};
 
-       /* Test results */
-       struct list_head        results;
-       struct mutex            results_lock;
+struct dmatest_chan {
+       struct list_head        node;
+       struct dma_chan         *chan;
+       struct list_head        threads;
 };
 
-static struct dmatest_info test_info;
+static DECLARE_WAIT_QUEUE_HEAD(thread_wait);
+static bool wait;
+
+static bool is_threaded_test_run(struct dmatest_info *info)
+{
+       struct dmatest_chan *dtc;
+
+       list_for_each_entry(dtc, &info->channels, node) {
+               struct dmatest_thread *thread;
+
+               list_for_each_entry(thread, &dtc->threads, node) {
+                       if (!thread->done)
+                               return true;
+               }
+       }
+
+       return false;
+}
+
+static int dmatest_wait_get(char *val, const struct kernel_param *kp)
+{
+       struct dmatest_info *info = &test_info;
+       struct dmatest_params *params = &info->params;
+
+       if (params->iterations)
+               wait_event(thread_wait, !is_threaded_test_run(info));
+       wait = true;
+       return param_get_bool(val, kp);
+}
+
+static struct kernel_param_ops wait_ops = {
+       .get = dmatest_wait_get,
+       .set = param_set_bool,
+};
+module_param_cb(wait, &wait_ops, &wait, S_IRUGO);
+MODULE_PARM_DESC(wait, "Wait for tests to complete (default: false)");
 
 static bool dmatest_match_channel(struct dmatest_params *params,
                struct dma_chan *chan)
@@ -223,7 +222,7 @@ static unsigned long dmatest_random(void)
 {
        unsigned long buf;
 
-       get_random_bytes(&buf, sizeof(buf));
+       prandom_bytes(&buf, sizeof(buf));
        return buf;
 }
 
@@ -262,9 +261,31 @@ static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len,
        }
 }
 
-static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
-               unsigned int start, unsigned int end, unsigned int counter,
-               u8 pattern, bool is_srcbuf)
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+               unsigned int counter, bool is_srcbuf)
+{
+       u8              diff = actual ^ pattern;
+       u8              expected = pattern | (~counter & PATTERN_COUNT_MASK);
+       const char      *thread_name = current->comm;
+
+       if (is_srcbuf)
+               pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n",
+                       thread_name, index, expected, actual);
+       else if ((pattern & PATTERN_COPY)
+                       && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+               pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n",
+                       thread_name, index, expected, actual);
+       else if (diff & PATTERN_SRC)
+               pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n",
+                       thread_name, index, expected, actual);
+       else
+               pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n",
+                       thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
+               unsigned int end, unsigned int counter, u8 pattern,
+               bool is_srcbuf)
 {
        unsigned int i;
        unsigned int error_count = 0;
@@ -272,7 +293,6 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
        u8 expected;
        u8 *buf;
        unsigned int counter_orig = counter;
-       struct dmatest_verify_buffer *vb;
 
        for (; (buf = *bufs); bufs++) {
                counter = counter_orig;
@@ -280,12 +300,9 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
                        actual = buf[i];
                        expected = pattern | (~counter & PATTERN_COUNT_MASK);
                        if (actual != expected) {
-                               if (error_count < MAX_ERROR_COUNT && vr) {
-                                       vb = &vr->data[error_count];
-                                       vb->index = i;
-                                       vb->expected = expected;
-                                       vb->actual = actual;
-                               }
+                               if (error_count < MAX_ERROR_COUNT)
+                                       dmatest_mismatch(actual, pattern, i,
+                                                        counter, is_srcbuf);
                                error_count++;
                        }
                        counter++;
@@ -293,7 +310,7 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
        }
 
        if (error_count > MAX_ERROR_COUNT)
-               pr_warning("%s: %u errors suppressed\n",
+               pr_warn("%s: %u errors suppressed\n",
                        current->comm, error_count - MAX_ERROR_COUNT);
 
        return error_count;
@@ -313,20 +330,6 @@ static void dmatest_callback(void *arg)
        wake_up_all(done->wait);
 }
 
-static inline void unmap_src(struct device *dev, dma_addr_t *addr, size_t len,
-                            unsigned int count)
-{
-       while (count--)
-               dma_unmap_single(dev, addr[count], len, DMA_TO_DEVICE);
-}
-
-static inline void unmap_dst(struct device *dev, dma_addr_t *addr, size_t len,
-                            unsigned int count)
-{
-       while (count--)
-               dma_unmap_single(dev, addr[count], len, DMA_BIDIRECTIONAL);
-}
-
 static unsigned int min_odd(unsigned int x, unsigned int y)
 {
        unsigned int val = min(x, y);
@@ -334,172 +337,49 @@ static unsigned int min_odd(unsigned int x, unsigned int y)
        return val % 2 ? val : val - 1;
 }
 
-static char *verify_result_get_one(struct dmatest_verify_result *vr,
-               unsigned int i)
+static void result(const char *err, unsigned int n, unsigned int src_off,
+                  unsigned int dst_off, unsigned int len, unsigned long data)
 {
-       struct dmatest_verify_buffer *vb = &vr->data[i];
-       u8 diff = vb->actual ^ vr->pattern;
-       static char buf[512];
-       char *msg;
-
-       if (vr->is_srcbuf)
-               msg = "srcbuf overwritten!";
-       else if ((vr->pattern & PATTERN_COPY)
-                       && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
-               msg = "dstbuf not copied!";
-       else if (diff & PATTERN_SRC)
-               msg = "dstbuf was copied!";
-       else
-               msg = "dstbuf mismatch!";
-
-       snprintf(buf, sizeof(buf) - 1, "%s [0x%x] Expected %02x, got %02x", msg,
-                vb->index, vb->expected, vb->actual);
-
-       return buf;
+       pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)",
+               current->comm, n, err, src_off, dst_off, len, data);
 }
 
-static char *thread_result_get(const char *name,
-               struct dmatest_thread_result *tr)
+static void dbg_result(const char *err, unsigned int n, unsigned int src_off,
+                      unsigned int dst_off, unsigned int len,
+                      unsigned long data)
 {
-       static const char * const messages[] = {
-               [DMATEST_ET_OK]                 = "No errors",
-               [DMATEST_ET_MAP_SRC]            = "src mapping error",
-               [DMATEST_ET_MAP_DST]            = "dst mapping error",
-               [DMATEST_ET_PREP]               = "prep error",
-               [DMATEST_ET_SUBMIT]             = "submit error",
-               [DMATEST_ET_TIMEOUT]            = "test timed out",
-               [DMATEST_ET_DMA_ERROR]          =
-                       "got completion callback (DMA_ERROR)",
-               [DMATEST_ET_DMA_IN_PROGRESS]    =
-                       "got completion callback (DMA_IN_PROGRESS)",
-               [DMATEST_ET_VERIFY]             = "errors",
-               [DMATEST_ET_VERIFY_BUF]         = "verify errors",
-       };
-       static char buf[512];
-
-       snprintf(buf, sizeof(buf) - 1,
-                "%s: #%u: %s with src_off=0x%x ""dst_off=0x%x len=0x%x (%lu)",
-                name, tr->n, messages[tr->type], tr->src_off, tr->dst_off,
-                tr->len, tr->data);
-
-       return buf;
+       pr_debug("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)",
+                  current->comm, n, err, src_off, dst_off, len, data);
 }
 
-static int thread_result_add(struct dmatest_info *info,
-               struct dmatest_result *r, enum dmatest_error_type type,
-               unsigned int n, unsigned int src_off, unsigned int dst_off,
-               unsigned int len, unsigned long data)
-{
-       struct dmatest_thread_result *tr;
-
-       tr = kzalloc(sizeof(*tr), GFP_KERNEL);
-       if (!tr)
-               return -ENOMEM;
-
-       tr->type = type;
-       tr->n = n;
-       tr->src_off = src_off;
-       tr->dst_off = dst_off;
-       tr->len = len;
-       tr->data = data;
+#define verbose_result(err, n, src_off, dst_off, len, data) ({ \
+       if (verbose) \
+               result(err, n, src_off, dst_off, len, data); \
+       else \
+               dbg_result(err, n, src_off, dst_off, len, data); \
+})
 
-       mutex_lock(&info->results_lock);
-       list_add_tail(&tr->node, &r->results);
-       mutex_unlock(&info->results_lock);
-
-       if (tr->type == DMATEST_ET_OK)
-               pr_debug("%s\n", thread_result_get(r->name, tr));
-       else
-               pr_warn("%s\n", thread_result_get(r->name, tr));
-
-       return 0;
-}
-
-static unsigned int verify_result_add(struct dmatest_info *info,
-               struct dmatest_result *r, unsigned int n,
-               unsigned int src_off, unsigned int dst_off, unsigned int len,
-               u8 **bufs, int whence, unsigned int counter, u8 pattern,
-               bool is_srcbuf)
+static unsigned long long dmatest_persec(s64 runtime, unsigned int val)
 {
-       struct dmatest_verify_result *vr;
-       unsigned int error_count;
-       unsigned int buf_off = is_srcbuf ? src_off : dst_off;
-       unsigned int start, end;
-
-       if (whence < 0) {
-               start = 0;
-               end = buf_off;
-       } else if (whence > 0) {
-               start = buf_off + len;
-               end = info->params.buf_size;
-       } else {
-               start = buf_off;
-               end = buf_off + len;
-       }
+       unsigned long long per_sec = 1000000;
 
-       vr = kmalloc(sizeof(*vr), GFP_KERNEL);
-       if (!vr) {
-               pr_warn("dmatest: No memory to store verify result\n");
-               return dmatest_verify(NULL, bufs, start, end, counter, pattern,
-                                     is_srcbuf);
-       }
-
-       vr->pattern = pattern;
-       vr->is_srcbuf = is_srcbuf;
-
-       error_count = dmatest_verify(vr, bufs, start, end, counter, pattern,
-                                    is_srcbuf);
-       if (error_count) {
-               vr->error_count = error_count;
-               thread_result_add(info, r, DMATEST_ET_VERIFY_BUF, n, src_off,
-                                 dst_off, len, (unsigned long)vr);
-               return error_count;
-       }
-
-       kfree(vr);
-       return 0;
-}
-
-static void result_free(struct dmatest_info *info, const char *name)
-{
-       struct dmatest_result *r, *_r;
-
-       mutex_lock(&info->results_lock);
-       list_for_each_entry_safe(r, _r, &info->results, node) {
-               struct dmatest_thread_result *tr, *_tr;
-
-               if (name && strcmp(r->name, name))
-                       continue;
-
-               list_for_each_entry_safe(tr, _tr, &r->results, node) {
-                       if (tr->type == DMATEST_ET_VERIFY_BUF)
-                               kfree(tr->vr);
-                       list_del(&tr->node);
-                       kfree(tr);
-               }
+       if (runtime <= 0)
+               return 0;
 
-               kfree(r->name);
-               list_del(&r->node);
-               kfree(r);
+       /* drop precision until runtime is 32-bits */
+       while (runtime > UINT_MAX) {
+               runtime >>= 1;
+               per_sec <<= 1;
        }
 
-       mutex_unlock(&info->results_lock);
+       per_sec *= val;
+       do_div(per_sec, runtime);
+       return per_sec;
 }
 
-static struct dmatest_result *result_init(struct dmatest_info *info,
-               const char *name)
+static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
 {
-       struct dmatest_result *r;
-
-       r = kzalloc(sizeof(*r), GFP_KERNEL);
-       if (r) {
-               r->name = kstrdup(name, GFP_KERNEL);
-               INIT_LIST_HEAD(&r->results);
-               mutex_lock(&info->results_lock);
-               list_add_tail(&r->node, &info->results);
-               mutex_unlock(&info->results_lock);
-       }
-       return r;
+       return dmatest_persec(runtime, len >> 10);
 }
 
 /*
@@ -525,7 +405,6 @@ static int dmatest_func(void *data)
        struct dmatest_params   *params;
        struct dma_chan         *chan;
        struct dma_device       *dev;
-       const char              *thread_name;
        unsigned int            src_off, dst_off, len;
        unsigned int            error_count;
        unsigned int            failed_tests = 0;
@@ -538,9 +417,10 @@ static int dmatest_func(void *data)
        int                     src_cnt;
        int                     dst_cnt;
        int                     i;
-       struct dmatest_result   *result;
+       ktime_t                 ktime;
+       s64                     runtime = 0;
+       unsigned long long      total_len = 0;
 
-       thread_name = current->comm;
        set_freezable();
 
        ret = -ENOMEM;
@@ -570,10 +450,6 @@ static int dmatest_func(void *data)
        } else
                goto err_thread_type;
 
-       result = result_init(info, thread_name);
-       if (!result)
-               goto err_srcs;
-
        thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL);
        if (!thread->srcs)
                goto err_srcs;
@@ -597,17 +473,17 @@ static int dmatest_func(void *data)
        set_user_nice(current, 10);
 
        /*
-        * src buffers are freed by the DMAEngine code with dma_unmap_single()
-        * dst buffers are freed by ourselves below
+        * src and dst buffers are freed by ourselves below
         */
-       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT
-             | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE;
+       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
+       ktime = ktime_get();
        while (!kthread_should_stop()
               && !(params->iterations && total_tests >= params->iterations)) {
                struct dma_async_tx_descriptor *tx = NULL;
-               dma_addr_t dma_srcs[src_cnt];
-               dma_addr_t dma_dsts[dst_cnt];
+               struct dmaengine_unmap_data *um;
+               dma_addr_t srcs[src_cnt];
+               dma_addr_t *dsts;
                u8 align = 0;
 
                total_tests++;
@@ -626,81 +502,103 @@ static int dmatest_func(void *data)
                        break;
                }
 
-               len = dmatest_random() % params->buf_size + 1;
+               if (params->noverify) {
+                       len = params->buf_size;
+                       src_off = 0;
+                       dst_off = 0;
+               } else {
+                       len = dmatest_random() % params->buf_size + 1;
+                       len = (len >> align) << align;
+                       if (!len)
+                               len = 1 << align;
+                       src_off = dmatest_random() % (params->buf_size - len + 1);
+                       dst_off = dmatest_random() % (params->buf_size - len + 1);
+
+                       src_off = (src_off >> align) << align;
+                       dst_off = (dst_off >> align) << align;
+
+                       dmatest_init_srcs(thread->srcs, src_off, len,
+                                         params->buf_size);
+                       dmatest_init_dsts(thread->dsts, dst_off, len,
+                                         params->buf_size);
+               }
+
                len = (len >> align) << align;
                if (!len)
                        len = 1 << align;
-               src_off = dmatest_random() % (params->buf_size - len + 1);
-               dst_off = dmatest_random() % (params->buf_size - len + 1);
+               total_len += len;
 
-               src_off = (src_off >> align) << align;
-               dst_off = (dst_off >> align) << align;
-
-               dmatest_init_srcs(thread->srcs, src_off, len, params->buf_size);
-               dmatest_init_dsts(thread->dsts, dst_off, len, params->buf_size);
+               um = dmaengine_get_unmap_data(dev->dev, src_cnt+dst_cnt,
+                                             GFP_KERNEL);
+               if (!um) {
+                       failed_tests++;
+                       result("unmap data NULL", total_tests,
+                              src_off, dst_off, len, ret);
+                       continue;
+               }
 
+               um->len = params->buf_size;
                for (i = 0; i < src_cnt; i++) {
-                       u8 *buf = thread->srcs[i] + src_off;
-
-                       dma_srcs[i] = dma_map_single(dev->dev, buf, len,
-                                                    DMA_TO_DEVICE);
-                       ret = dma_mapping_error(dev->dev, dma_srcs[i]);
+                       unsigned long buf = (unsigned long) thread->srcs[i];
+                       struct page *pg = virt_to_page(buf);
+                       unsigned pg_off = buf & ~PAGE_MASK;
+
+                       um->addr[i] = dma_map_page(dev->dev, pg, pg_off,
+                                                  um->len, DMA_TO_DEVICE);
+                       srcs[i] = um->addr[i] + src_off;
+                       ret = dma_mapping_error(dev->dev, um->addr[i]);
                        if (ret) {
-                               unmap_src(dev->dev, dma_srcs, len, i);
-                               thread_result_add(info, result,
-                                                 DMATEST_ET_MAP_SRC,
-                                                 total_tests, src_off, dst_off,
-                                                 len, ret);
+                               dmaengine_unmap_put(um);
+                               result("src mapping error", total_tests,
+                                      src_off, dst_off, len, ret);
                                failed_tests++;
                                continue;
                        }
+                       um->to_cnt++;
                }
                /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
+               dsts = &um->addr[src_cnt];
                for (i = 0; i < dst_cnt; i++) {
-                       dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
-                                                    params->buf_size,
-                                                    DMA_BIDIRECTIONAL);
-                       ret = dma_mapping_error(dev->dev, dma_dsts[i]);
+                       unsigned long buf = (unsigned long) thread->dsts[i];
+                       struct page *pg = virt_to_page(buf);
+                       unsigned pg_off = buf & ~PAGE_MASK;
+
+                       dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len,
+                                              DMA_BIDIRECTIONAL);
+                       ret = dma_mapping_error(dev->dev, dsts[i]);
                        if (ret) {
-                               unmap_src(dev->dev, dma_srcs, len, src_cnt);
-                               unmap_dst(dev->dev, dma_dsts, params->buf_size,
-                                         i);
-                               thread_result_add(info, result,
-                                                 DMATEST_ET_MAP_DST,
-                                                 total_tests, src_off, dst_off,
-                                                 len, ret);
+                               dmaengine_unmap_put(um);
+                               result("dst mapping error", total_tests,
+                                      src_off, dst_off, len, ret);
                                failed_tests++;
                                continue;
                        }
+                       um->bidi_cnt++;
                }
 
                if (thread->type == DMA_MEMCPY)
                        tx = dev->device_prep_dma_memcpy(chan,
-                                                        dma_dsts[0] + dst_off,
-                                                        dma_srcs[0], len,
-                                                        flags);
+                                                        dsts[0] + dst_off,
+                                                        srcs[0], len, flags);
                else if (thread->type == DMA_XOR)
                        tx = dev->device_prep_dma_xor(chan,
-                                                     dma_dsts[0] + dst_off,
-                                                     dma_srcs, src_cnt,
+                                                     dsts[0] + dst_off,
+                                                     srcs, src_cnt,
                                                      len, flags);
                else if (thread->type == DMA_PQ) {
                        dma_addr_t dma_pq[dst_cnt];
 
                        for (i = 0; i < dst_cnt; i++)
-                               dma_pq[i] = dma_dsts[i] + dst_off;
-                       tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+                               dma_pq[i] = dsts[i] + dst_off;
+                       tx = dev->device_prep_dma_pq(chan, dma_pq, srcs,
                                                     src_cnt, pq_coefs,
                                                     len, flags);
                }
 
                if (!tx) {
-                       unmap_src(dev->dev, dma_srcs, len, src_cnt);
-                       unmap_dst(dev->dev, dma_dsts, params->buf_size,
-                                 dst_cnt);
-                       thread_result_add(info, result, DMATEST_ET_PREP,
-                                         total_tests, src_off, dst_off,
-                                         len, 0);
+                       dmaengine_unmap_put(um);
+                       result("prep error", total_tests, src_off,
+                              dst_off, len, ret);
                        msleep(100);
                        failed_tests++;
                        continue;
@@ -712,9 +610,9 @@ static int dmatest_func(void *data)
                cookie = tx->tx_submit(tx);
 
                if (dma_submit_error(cookie)) {
-                       thread_result_add(info, result, DMATEST_ET_SUBMIT,
-                                         total_tests, src_off, dst_off,
-                                         len, cookie);
+                       dmaengine_unmap_put(um);
+                       result("submit error", total_tests, src_off,
+                              dst_off, len, ret);
                        msleep(100);
                        failed_tests++;
                        continue;
@@ -735,59 +633,59 @@ static int dmatest_func(void *data)
                         * free it this time?" dancing.  For now, just
                         * leave it dangling.
                         */
-                       thread_result_add(info, result, DMATEST_ET_TIMEOUT,
-                                         total_tests, src_off, dst_off,
-                                         len, 0);
+                       dmaengine_unmap_put(um);
+                       result("test timed out", total_tests, src_off, dst_off,
+                              len, 0);
                        failed_tests++;
                        continue;
-               } else if (status != DMA_SUCCESS) {
-                       enum dmatest_error_type type = (status == DMA_ERROR) ?
-                               DMATEST_ET_DMA_ERROR : DMATEST_ET_DMA_IN_PROGRESS;
-                       thread_result_add(info, result, type,
-                                         total_tests, src_off, dst_off,
-                                         len, status);
+               } else if (status != DMA_COMPLETE) {
+                       dmaengine_unmap_put(um);
+                       result(status == DMA_ERROR ?
+                              "completion error status" :
+                              "completion busy status", total_tests, src_off,
+                              dst_off, len, ret);
                        failed_tests++;
                        continue;
                }
 
-               /* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
-               unmap_dst(dev->dev, dma_dsts, params->buf_size, dst_cnt);
+               dmaengine_unmap_put(um);
 
-               error_count = 0;
+               if (params->noverify) {
+                       verbose_result("test passed", total_tests, src_off,
+                                      dst_off, len, 0);
+                       continue;
+               }
 
-               pr_debug("%s: verifying source buffer...\n", thread_name);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->srcs, -1,
+               pr_debug("%s: verifying source buffer...\n", current->comm);
+               error_count = dmatest_verify(thread->srcs, 0, src_off,
                                0, PATTERN_SRC, true);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->srcs, 0,
-                               src_off, PATTERN_SRC | PATTERN_COPY, true);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->srcs, 1,
-                               src_off + len, PATTERN_SRC, true);
-
-               pr_debug("%s: verifying dest buffer...\n", thread_name);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->dsts, -1,
+               error_count += dmatest_verify(thread->srcs, src_off,
+                               src_off + len, src_off,
+                               PATTERN_SRC | PATTERN_COPY, true);
+               error_count += dmatest_verify(thread->srcs, src_off + len,
+                               params->buf_size, src_off + len,
+                               PATTERN_SRC, true);
+
+               pr_debug("%s: verifying dest buffer...\n", current->comm);
+               error_count += dmatest_verify(thread->dsts, 0, dst_off,
                                0, PATTERN_DST, false);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->dsts, 0,
-                               src_off, PATTERN_SRC | PATTERN_COPY, false);
-               error_count += verify_result_add(info, result, total_tests,
-                               src_off, dst_off, len, thread->dsts, 1,
-                               dst_off + len, PATTERN_DST, false);
+               error_count += dmatest_verify(thread->dsts, dst_off,
+                               dst_off + len, src_off,
+                               PATTERN_SRC | PATTERN_COPY, false);
+               error_count += dmatest_verify(thread->dsts, dst_off + len,
+                               params->buf_size, dst_off + len,
+                               PATTERN_DST, false);
 
                if (error_count) {
-                       thread_result_add(info, result, DMATEST_ET_VERIFY,
-                                         total_tests, src_off, dst_off,
-                                         len, error_count);
+                       result("data error", total_tests, src_off, dst_off,
+                              len, error_count);
                        failed_tests++;
                } else {
-                       thread_result_add(info, result, DMATEST_ET_OK,
-                                         total_tests, src_off, dst_off,
-                                         len, 0);
+                       verbose_result("test passed", total_tests, src_off,
+                                      dst_off, len, 0);
                }
        }
+       runtime = ktime_us_delta(ktime_get(), ktime);
 
        ret = 0;
        for (i = 0; thread->dsts[i]; i++)
@@ -802,20 +700,17 @@ err_srcbuf:
 err_srcs:
        kfree(pq_coefs);
 err_thread_type:
-       pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
-                       thread_name, total_tests, failed_tests, ret);
+       pr_info("%s: summary %u tests, %u failures %llu iops %llu KB/s (%d)\n",
+               current->comm, total_tests, failed_tests,
+               dmatest_persec(runtime, total_tests),
+               dmatest_KBs(runtime, total_len), ret);
 
        /* terminate all transfers on specified channels */
        if (ret)
                dmaengine_terminate_all(chan);
 
        thread->done = true;
-
-       if (params->iterations > 0)
-               while (!kthread_should_stop()) {
-                       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
-                       interruptible_sleep_on(&wait_dmatest_exit);
-               }
+       wake_up(&thread_wait);
 
        return ret;
 }
@@ -828,9 +723,10 @@ static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
 
        list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
                ret = kthread_stop(thread->task);
-               pr_debug("dmatest: thread %s exited with status %d\n",
-                               thread->task->comm, ret);
+               pr_debug("thread %s exited with status %d\n",
+                        thread->task->comm, ret);
                list_del(&thread->node);
+               put_task_struct(thread->task);
                kfree(thread);
        }
 
@@ -861,27 +757,27 @@ static int dmatest_add_threads(struct dmatest_info *info,
        for (i = 0; i < params->threads_per_chan; i++) {
                thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
                if (!thread) {
-                       pr_warning("dmatest: No memory for %s-%s%u\n",
-                                  dma_chan_name(chan), op, i);
-
+                       pr_warn("No memory for %s-%s%u\n",
+                               dma_chan_name(chan), op, i);
                        break;
                }
                thread->info = info;
                thread->chan = dtc->chan;
                thread->type = type;
                smp_wmb();
-               thread->task = kthread_run(dmatest_func, thread, "%s-%s%u",
+               thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
                                dma_chan_name(chan), op, i);
                if (IS_ERR(thread->task)) {
-                       pr_warning("dmatest: Failed to run thread %s-%s%u\n",
-                                       dma_chan_name(chan), op, i);
+                       pr_warn("Failed to create thread %s-%s%u\n",
+                               dma_chan_name(chan), op, i);
                        kfree(thread);
                        break;
                }
 
                /* srcbuf and dstbuf are allocated by the thread itself */
-
+               get_task_struct(thread->task);
                list_add_tail(&thread->node, &dtc->threads);
+               wake_up_process(thread->task);
        }
 
        return i;
@@ -897,7 +793,7 @@ static int dmatest_add_channel(struct dmatest_info *info,
 
        dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
        if (!dtc) {
-               pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
+               pr_warn("No memory for %s\n", dma_chan_name(chan));
                return -ENOMEM;
        }
 
@@ -917,7 +813,7 @@ static int dmatest_add_channel(struct dmatest_info *info,
                thread_count += cnt > 0 ? cnt : 0;
        }
 
-       pr_info("dmatest: Started %u threads using %s\n",
+       pr_info("Started %u threads using %s\n",
                thread_count, dma_chan_name(chan));
 
        list_add_tail(&dtc->node, &info->channels);
@@ -937,20 +833,20 @@ static bool filter(struct dma_chan *chan, void *param)
                return true;
 }
 
-static int __run_threaded_test(struct dmatest_info *info)
+static void request_channels(struct dmatest_info *info,
+                            enum dma_transaction_type type)
 {
        dma_cap_mask_t mask;
-       struct dma_chan *chan;
-       struct dmatest_params *params = &info->params;
-       int err = 0;
 
        dma_cap_zero(mask);
-       dma_cap_set(DMA_MEMCPY, mask);
+       dma_cap_set(type, mask);
        for (;;) {
+               struct dmatest_params *params = &info->params;
+               struct dma_chan *chan;
+
                chan = dma_request_channel(mask, filter, params);
                if (chan) {
-                       err = dmatest_add_channel(info, chan);
-                       if (err) {
+                       if (dmatest_add_channel(info, chan)) {
                                dma_release_channel(chan);
                                break; /* add_channel failed, punt */
                        }
@@ -960,22 +856,30 @@ static int __run_threaded_test(struct dmatest_info *info)
                    info->nr_channels >= params->max_channels)
                        break; /* we have all we need */
        }
-       return err;
 }
 
-#ifndef MODULE
-static int run_threaded_test(struct dmatest_info *info)
+static void run_threaded_test(struct dmatest_info *info)
 {
-       int ret;
+       struct dmatest_params *params = &info->params;
 
-       mutex_lock(&info->lock);
-       ret = __run_threaded_test(info);
-       mutex_unlock(&info->lock);
-       return ret;
+       /* Copy test parameters */
+       params->buf_size = test_buf_size;
+       strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
+       strlcpy(params->device, strim(test_device), sizeof(params->device));
+       params->threads_per_chan = threads_per_chan;
+       params->max_channels = max_channels;
+       params->iterations = iterations;
+       params->xor_sources = xor_sources;
+       params->pq_sources = pq_sources;
+       params->timeout = timeout;
+       params->noverify = noverify;
+
+       request_channels(info, DMA_MEMCPY);
+       request_channels(info, DMA_XOR);
+       request_channels(info, DMA_PQ);
 }
-#endif
 
-static void __stop_threaded_test(struct dmatest_info *info)
+static void stop_threaded_test(struct dmatest_info *info)
 {
        struct dmatest_chan *dtc, *_dtc;
        struct dma_chan *chan;
@@ -984,203 +888,86 @@ static void __stop_threaded_test(struct dmatest_info *info)
                list_del(&dtc->node);
                chan = dtc->chan;
                dmatest_cleanup_channel(dtc);
-               pr_debug("dmatest: dropped channel %s\n", dma_chan_name(chan));
+               pr_debug("dropped channel %s\n", dma_chan_name(chan));
                dma_release_channel(chan);
        }
 
        info->nr_channels = 0;
 }
 
-static void stop_threaded_test(struct dmatest_info *info)
+static void restart_threaded_test(struct dmatest_info *info, bool run)
 {
-       mutex_lock(&info->lock);
-       __stop_threaded_test(info);
-       mutex_unlock(&info->lock);
-}
-
-static int __restart_threaded_test(struct dmatest_info *info, bool run)
-{
-       struct dmatest_params *params = &info->params;
+       /* we might be called early to set run=, defer running until all
+        * parameters have been evaluated
+        */
+       if (!info->did_init)
+               return;
 
        /* Stop any running test first */
-       __stop_threaded_test(info);
-
-       if (run == false)
-               return 0;
-
-       /* Clear results from previous run */
-       result_free(info, NULL);
-
-       /* Copy test parameters */
-       params->buf_size = test_buf_size;
-       strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
-       strlcpy(params->device, strim(test_device), sizeof(params->device));
-       params->threads_per_chan = threads_per_chan;
-       params->max_channels = max_channels;
-       params->iterations = iterations;
-       params->xor_sources = xor_sources;
-       params->pq_sources = pq_sources;
-       params->timeout = timeout;
+       stop_threaded_test(info);
 
        /* Run test with new parameters */
-       return __run_threaded_test(info);
-}
-
-static bool __is_threaded_test_run(struct dmatest_info *info)
-{
-       struct dmatest_chan *dtc;
-
-       list_for_each_entry(dtc, &info->channels, node) {
-               struct dmatest_thread *thread;
-
-               list_for_each_entry(thread, &dtc->threads, node) {
-                       if (!thread->done)
-                               return true;
-               }
-       }
-
-       return false;
+       run_threaded_test(info);
 }
 
-static ssize_t dtf_read_run(struct file *file, char __user *user_buf,
-               size_t count, loff_t *ppos)
+static int dmatest_run_get(char *val, const struct kernel_param *kp)
 {
-       struct dmatest_info *info = file->private_data;
-       char buf[3];
+       struct dmatest_info *info = &test_info;
 
        mutex_lock(&info->lock);
-
-       if (__is_threaded_test_run(info)) {
-               buf[0] = 'Y';
+       if (is_threaded_test_run(info)) {
+               dmatest_run = true;
        } else {
-               __stop_threaded_test(info);
-               buf[0] = 'N';
+               stop_threaded_test(info);
+               dmatest_run = false;
        }
-
        mutex_unlock(&info->lock);
-       buf[1] = '\n';
-       buf[2] = 0x00;
-       return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t dtf_write_run(struct file *file, const char __user *user_buf,
-               size_t count, loff_t *ppos)
-{
-       struct dmatest_info *info = file->private_data;
-       char buf[16];
-       bool bv;
-       int ret = 0;
 
-       if (copy_from_user(buf, user_buf, min(count, (sizeof(buf) - 1))))
-               return -EFAULT;
-
-       if (strtobool(buf, &bv) == 0) {
-               mutex_lock(&info->lock);
-
-               if (__is_threaded_test_run(info))
-                       ret = -EBUSY;
-               else
-                       ret = __restart_threaded_test(info, bv);
-
-               mutex_unlock(&info->lock);
-       }
-
-       return ret ? ret : count;
+       return param_get_bool(val, kp);
 }
 
-static const struct file_operations dtf_run_fops = {
-       .read   = dtf_read_run,
-       .write  = dtf_write_run,
-       .open   = simple_open,
-       .llseek = default_llseek,
-};
-
-static int dtf_results_show(struct seq_file *sf, void *data)
+static int dmatest_run_set(const char *val, const struct kernel_param *kp)
 {
-       struct dmatest_info *info = sf->private;
-       struct dmatest_result *result;
-       struct dmatest_thread_result *tr;
-       unsigned int i;
+       struct dmatest_info *info = &test_info;
+       int ret;
 
-       mutex_lock(&info->results_lock);
-       list_for_each_entry(result, &info->results, node) {
-               list_for_each_entry(tr, &result->results, node) {
-                       seq_printf(sf, "%s\n",
-                               thread_result_get(result->name, tr));
-                       if (tr->type == DMATEST_ET_VERIFY_BUF) {
-                               for (i = 0; i < tr->vr->error_count; i++) {
-                                       seq_printf(sf, "\t%s\n",
-                                               verify_result_get_one(tr->vr, i));
-                               }
-                       }
-               }
+       mutex_lock(&info->lock);
+       ret = param_set_bool(val, kp);
+       if (ret) {
+               mutex_unlock(&info->lock);
+               return ret;
        }
 
-       mutex_unlock(&info->results_lock);
-       return 0;
-}
-
-static int dtf_results_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, dtf_results_show, inode->i_private);
-}
-
-static const struct file_operations dtf_results_fops = {
-       .open           = dtf_results_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static int dmatest_register_dbgfs(struct dmatest_info *info)
-{
-       struct dentry *d;
-
-       d = debugfs_create_dir("dmatest", NULL);
-       if (IS_ERR(d))
-               return PTR_ERR(d);
-       if (!d)
-               goto err_root;
+       if (is_threaded_test_run(info))
+               ret = -EBUSY;
+       else if (dmatest_run)
+               restart_threaded_test(info, dmatest_run);
 
-       info->root = d;
-
-       /* Run or stop threaded test */
-       debugfs_create_file("run", S_IWUSR | S_IRUGO, info->root, info,
-                           &dtf_run_fops);
-
-       /* Results of test in progress */
-       debugfs_create_file("results", S_IRUGO, info->root, info,
-                           &dtf_results_fops);
-
-       return 0;
+       mutex_unlock(&info->lock);
 
-err_root:
-       pr_err("dmatest: Failed to initialize debugfs\n");
-       return -ENOMEM;
+       return ret;
 }
 
 static int __init dmatest_init(void)
 {
        struct dmatest_info *info = &test_info;
-       int ret;
-
-       memset(info, 0, sizeof(*info));
+       struct dmatest_params *params = &info->params;
 
-       mutex_init(&info->lock);
-       INIT_LIST_HEAD(&info->channels);
+       if (dmatest_run) {
+               mutex_lock(&info->lock);
+               run_threaded_test(info);
+               mutex_unlock(&info->lock);
+       }
 
-       mutex_init(&info->results_lock);
-       INIT_LIST_HEAD(&info->results);
+       if (params->iterations && wait)
+               wait_event(thread_wait, !is_threaded_test_run(info));
 
-       ret = dmatest_register_dbgfs(info);
-       if (ret)
-               return ret;
+       /* module parameters are stable, inittime tests are started,
+        * let userspace take over 'run' control
+        */
+       info->did_init = true;
 
-#ifdef MODULE
        return 0;
-#else
-       return run_threaded_test(info);
-#endif
 }
 /* when compiled-in wait for drivers to load first */
 late_initcall(dmatest_init);
@@ -1189,9 +976,9 @@ static void __exit dmatest_exit(void)
 {
        struct dmatest_info *info = &test_info;
 
-       debugfs_remove_recursive(info->root);
+       mutex_lock(&info->lock);
        stop_threaded_test(info);
-       result_free(info, NULL);
+       mutex_unlock(&info->lock);
 }
 module_exit(dmatest_exit);
 
index 89eb89f222846e0ff5d20cfc5e14619fc05d6600..7516be4677cf7e778ba3bef2482cae9179ff41a3 100644 (file)
@@ -85,10 +85,6 @@ static struct device *chan2dev(struct dma_chan *chan)
 {
        return &chan->dev->device;
 }
-static struct device *chan2parent(struct dma_chan *chan)
-{
-       return chan->dev->device.parent;
-}
 
 static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
 {
@@ -311,26 +307,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
        list_splice_init(&desc->tx_list, &dwc->free_list);
        list_move(&desc->desc_node, &dwc->free_list);
 
-       if (!is_slave_direction(dwc->direction)) {
-               struct device *parent = chan2parent(&dwc->chan);
-               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                               dma_unmap_single(parent, desc->lli.dar,
-                                       desc->total_len, DMA_FROM_DEVICE);
-                       else
-                               dma_unmap_page(parent, desc->lli.dar,
-                                       desc->total_len, DMA_FROM_DEVICE);
-               }
-               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                               dma_unmap_single(parent, desc->lli.sar,
-                                       desc->total_len, DMA_TO_DEVICE);
-                       else
-                               dma_unmap_page(parent, desc->lli.sar,
-                                       desc->total_len, DMA_TO_DEVICE);
-               }
-       }
-
+       dma_descriptor_unmap(txd);
        spin_unlock_irqrestore(&dwc->lock, flags);
 
        if (callback)
@@ -1098,13 +1075,13 @@ dwc_tx_status(struct dma_chan *chan,
        enum dma_status         ret;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_SUCCESS)
+       if (ret != DMA_COMPLETE)
                dma_set_residue(txstate, dwc_get_residue(dwc));
 
        if (dwc->paused && ret == DMA_IN_PROGRESS)
index bef8a368c8ddcac29f4bd7b4832bd3d0fa4cbdb6..2539ea0cbc6394f918fb849ffd3c6f6ca73f33a0 100644 (file)
 #define EDMA_CHANS     64
 #endif /* CONFIG_ARCH_DAVINCI_DA8XX */
 
-/* Max of 16 segments per channel to conserve PaRAM slots */
-#define MAX_NR_SG              16
+/*
+ * Max of 20 segments per channel to conserve PaRAM slots
+ * Also note that MAX_NR_SG should be atleast the no.of periods
+ * that are required for ASoC, otherwise DMA prep calls will
+ * fail. Today davinci-pcm is the only user of this driver and
+ * requires atleast 17 slots, so we setup the default to 20.
+ */
+#define MAX_NR_SG              20
 #define EDMA_MAX_SLOTS         MAX_NR_SG
 #define EDMA_DESCRIPTORS       16
 
 struct edma_desc {
        struct virt_dma_desc            vdesc;
        struct list_head                node;
+       int                             cyclic;
        int                             absync;
        int                             pset_nr;
        int                             processed;
@@ -167,8 +174,13 @@ static void edma_execute(struct edma_chan *echan)
         * then setup a link to the dummy slot, this results in all future
         * events being absorbed and that's OK because we're done
         */
-       if (edesc->processed == edesc->pset_nr)
-               edma_link(echan->slot[nslots-1], echan->ecc->dummy_slot);
+       if (edesc->processed == edesc->pset_nr) {
+               if (edesc->cyclic)
+                       edma_link(echan->slot[nslots-1], echan->slot[1]);
+               else
+                       edma_link(echan->slot[nslots-1],
+                                 echan->ecc->dummy_slot);
+       }
 
        edma_resume(echan->ch_num);
 
@@ -250,6 +262,117 @@ static int edma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
        return ret;
 }
 
+/*
+ * A PaRAM set configuration abstraction used by other modes
+ * @chan: Channel who's PaRAM set we're configuring
+ * @pset: PaRAM set to initialize and setup.
+ * @src_addr: Source address of the DMA
+ * @dst_addr: Destination address of the DMA
+ * @burst: In units of dev_width, how much to send
+ * @dev_width: How much is the dev_width
+ * @dma_length: Total length of the DMA transfer
+ * @direction: Direction of the transfer
+ */
+static int edma_config_pset(struct dma_chan *chan, struct edmacc_param *pset,
+       dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+       enum dma_slave_buswidth dev_width, unsigned int dma_length,
+       enum dma_transfer_direction direction)
+{
+       struct edma_chan *echan = to_edma_chan(chan);
+       struct device *dev = chan->device->dev;
+       int acnt, bcnt, ccnt, cidx;
+       int src_bidx, dst_bidx, src_cidx, dst_cidx;
+       int absync;
+
+       acnt = dev_width;
+       /*
+        * If the maxburst is equal to the fifo width, use
+        * A-synced transfers. This allows for large contiguous
+        * buffer transfers using only one PaRAM set.
+        */
+       if (burst == 1) {
+               /*
+                * For the A-sync case, bcnt and ccnt are the remainder
+                * and quotient respectively of the division of:
+                * (dma_length / acnt) by (SZ_64K -1). This is so
+                * that in case bcnt over flows, we have ccnt to use.
+                * Note: In A-sync tranfer only, bcntrld is used, but it
+                * only applies for sg_dma_len(sg) >= SZ_64K.
+                * In this case, the best way adopted is- bccnt for the
+                * first frame will be the remainder below. Then for
+                * every successive frame, bcnt will be SZ_64K-1. This
+                * is assured as bcntrld = 0xffff in end of function.
+                */
+               absync = false;
+               ccnt = dma_length / acnt / (SZ_64K - 1);
+               bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
+               /*
+                * If bcnt is non-zero, we have a remainder and hence an
+                * extra frame to transfer, so increment ccnt.
+                */
+               if (bcnt)
+                       ccnt++;
+               else
+                       bcnt = SZ_64K - 1;
+               cidx = acnt;
+       } else {
+               /*
+                * If maxburst is greater than the fifo address_width,
+                * use AB-synced transfers where A count is the fifo
+                * address_width and B count is the maxburst. In this
+                * case, we are limited to transfers of C count frames
+                * of (address_width * maxburst) where C count is limited
+                * to SZ_64K-1. This places an upper bound on the length
+                * of an SG segment that can be handled.
+                */
+               absync = true;
+               bcnt = burst;
+               ccnt = dma_length / (acnt * bcnt);
+               if (ccnt > (SZ_64K - 1)) {
+                       dev_err(dev, "Exceeded max SG segment size\n");
+                       return -EINVAL;
+               }
+               cidx = acnt * bcnt;
+       }
+
+       if (direction == DMA_MEM_TO_DEV) {
+               src_bidx = acnt;
+               src_cidx = cidx;
+               dst_bidx = 0;
+               dst_cidx = 0;
+       } else if (direction == DMA_DEV_TO_MEM)  {
+               src_bidx = 0;
+               src_cidx = 0;
+               dst_bidx = acnt;
+               dst_cidx = cidx;
+       } else {
+               dev_err(dev, "%s: direction not implemented yet\n", __func__);
+               return -EINVAL;
+       }
+
+       pset->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+       /* Configure A or AB synchronized transfers */
+       if (absync)
+               pset->opt |= SYNCDIM;
+
+       pset->src = src_addr;
+       pset->dst = dst_addr;
+
+       pset->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+       pset->src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+       pset->a_b_cnt = bcnt << 16 | acnt;
+       pset->ccnt = ccnt;
+       /*
+        * Only time when (bcntrld) auto reload is required is for
+        * A-sync case, and in this case, a requirement of reload value
+        * of SZ_64K-1 only is assured. 'link' is initially set to NULL
+        * and then later will be populated by edma_execute.
+        */
+       pset->link_bcntrld = 0xffffffff;
+       return absync;
+}
+
 static struct dma_async_tx_descriptor *edma_prep_slave_sg(
        struct dma_chan *chan, struct scatterlist *sgl,
        unsigned int sg_len, enum dma_transfer_direction direction,
@@ -258,23 +381,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
        struct edma_chan *echan = to_edma_chan(chan);
        struct device *dev = chan->device->dev;
        struct edma_desc *edesc;
-       dma_addr_t dev_addr;
+       dma_addr_t src_addr = 0, dst_addr = 0;
        enum dma_slave_buswidth dev_width;
        u32 burst;
        struct scatterlist *sg;
-       int acnt, bcnt, ccnt, src, dst, cidx;
-       int src_bidx, dst_bidx, src_cidx, dst_cidx;
-       int i, nslots;
+       int i, nslots, ret;
 
        if (unlikely(!echan || !sgl || !sg_len))
                return NULL;
 
        if (direction == DMA_DEV_TO_MEM) {
-               dev_addr = echan->cfg.src_addr;
+               src_addr = echan->cfg.src_addr;
                dev_width = echan->cfg.src_addr_width;
                burst = echan->cfg.src_maxburst;
        } else if (direction == DMA_MEM_TO_DEV) {
-               dev_addr = echan->cfg.dst_addr;
+               dst_addr = echan->cfg.dst_addr;
                dev_width = echan->cfg.dst_addr_width;
                burst = echan->cfg.dst_maxburst;
        } else {
@@ -307,7 +428,6 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
                        if (echan->slot[i] < 0) {
                                kfree(edesc);
                                dev_err(dev, "Failed to allocate slot\n");
-                               kfree(edesc);
                                return NULL;
                        }
                }
@@ -315,64 +435,21 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 
        /* Configure PaRAM sets for each SG */
        for_each_sg(sgl, sg, sg_len, i) {
-
-               acnt = dev_width;
-
-               /*
-                * If the maxburst is equal to the fifo width, use
-                * A-synced transfers. This allows for large contiguous
-                * buffer transfers using only one PaRAM set.
-                */
-               if (burst == 1) {
-                       edesc->absync = false;
-                       ccnt = sg_dma_len(sg) / acnt / (SZ_64K - 1);
-                       bcnt = sg_dma_len(sg) / acnt - ccnt * (SZ_64K - 1);
-                       if (bcnt)
-                               ccnt++;
-                       else
-                               bcnt = SZ_64K - 1;
-                       cidx = acnt;
-               /*
-                * If maxburst is greater than the fifo address_width,
-                * use AB-synced transfers where A count is the fifo
-                * address_width and B count is the maxburst. In this
-                * case, we are limited to transfers of C count frames
-                * of (address_width * maxburst) where C count is limited
-                * to SZ_64K-1. This places an upper bound on the length
-                * of an SG segment that can be handled.
-                */
-               } else {
-                       edesc->absync = true;
-                       bcnt = burst;
-                       ccnt = sg_dma_len(sg) / (acnt * bcnt);
-                       if (ccnt > (SZ_64K - 1)) {
-                               dev_err(dev, "Exceeded max SG segment size\n");
-                               kfree(edesc);
-                               return NULL;
-                       }
-                       cidx = acnt * bcnt;
+               /* Get address for each SG */
+               if (direction == DMA_DEV_TO_MEM)
+                       dst_addr = sg_dma_address(sg);
+               else
+                       src_addr = sg_dma_address(sg);
+
+               ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+                                      dst_addr, burst, dev_width,
+                                      sg_dma_len(sg), direction);
+               if (ret < 0) {
+                       kfree(edesc);
+                       return NULL;
                }
 
-               if (direction == DMA_MEM_TO_DEV) {
-                       src = sg_dma_address(sg);
-                       dst = dev_addr;
-                       src_bidx = acnt;
-                       src_cidx = cidx;
-                       dst_bidx = 0;
-                       dst_cidx = 0;
-               } else {
-                       src = dev_addr;
-                       dst = sg_dma_address(sg);
-                       src_bidx = 0;
-                       src_cidx = 0;
-                       dst_bidx = acnt;
-                       dst_cidx = cidx;
-               }
-
-               edesc->pset[i].opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
-               /* Configure A or AB synchronized transfers */
-               if (edesc->absync)
-                       edesc->pset[i].opt |= SYNCDIM;
+               edesc->absync = ret;
 
                /* If this is the last in a current SG set of transactions,
                   enable interrupts so that next set is processed */
@@ -382,17 +459,138 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
                /* If this is the last set, enable completion interrupt flag */
                if (i == sg_len - 1)
                        edesc->pset[i].opt |= TCINTEN;
+       }
 
-               edesc->pset[i].src = src;
-               edesc->pset[i].dst = dst;
+       return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
 
-               edesc->pset[i].src_dst_bidx = (dst_bidx << 16) | src_bidx;
-               edesc->pset[i].src_dst_cidx = (dst_cidx << 16) | src_cidx;
+static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
+       struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+       size_t period_len, enum dma_transfer_direction direction,
+       unsigned long tx_flags, void *context)
+{
+       struct edma_chan *echan = to_edma_chan(chan);
+       struct device *dev = chan->device->dev;
+       struct edma_desc *edesc;
+       dma_addr_t src_addr, dst_addr;
+       enum dma_slave_buswidth dev_width;
+       u32 burst;
+       int i, ret, nslots;
+
+       if (unlikely(!echan || !buf_len || !period_len))
+               return NULL;
+
+       if (direction == DMA_DEV_TO_MEM) {
+               src_addr = echan->cfg.src_addr;
+               dst_addr = buf_addr;
+               dev_width = echan->cfg.src_addr_width;
+               burst = echan->cfg.src_maxburst;
+       } else if (direction == DMA_MEM_TO_DEV) {
+               src_addr = buf_addr;
+               dst_addr = echan->cfg.dst_addr;
+               dev_width = echan->cfg.dst_addr_width;
+               burst = echan->cfg.dst_maxburst;
+       } else {
+               dev_err(dev, "%s: bad direction?\n", __func__);
+               return NULL;
+       }
+
+       if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+               dev_err(dev, "Undefined slave buswidth\n");
+               return NULL;
+       }
+
+       if (unlikely(buf_len % period_len)) {
+               dev_err(dev, "Period should be multiple of Buffer length\n");
+               return NULL;
+       }
+
+       nslots = (buf_len / period_len) + 1;
+
+       /*
+        * Cyclic DMA users such as audio cannot tolerate delays introduced
+        * by cases where the number of periods is more than the maximum
+        * number of SGs the EDMA driver can handle at a time. For DMA types
+        * such as Slave SGs, such delays are tolerable and synchronized,
+        * but the synchronization is difficult to achieve with Cyclic and
+        * cannot be guaranteed, so we error out early.
+        */
+       if (nslots > MAX_NR_SG)
+               return NULL;
+
+       edesc = kzalloc(sizeof(*edesc) + nslots *
+               sizeof(edesc->pset[0]), GFP_ATOMIC);
+       if (!edesc) {
+               dev_dbg(dev, "Failed to allocate a descriptor\n");
+               return NULL;
+       }
+
+       edesc->cyclic = 1;
+       edesc->pset_nr = nslots;
+
+       dev_dbg(dev, "%s: nslots=%d\n", __func__, nslots);
+       dev_dbg(dev, "%s: period_len=%d\n", __func__, period_len);
+       dev_dbg(dev, "%s: buf_len=%d\n", __func__, buf_len);
+
+       for (i = 0; i < nslots; i++) {
+               /* Allocate a PaRAM slot, if needed */
+               if (echan->slot[i] < 0) {
+                       echan->slot[i] =
+                               edma_alloc_slot(EDMA_CTLR(echan->ch_num),
+                                               EDMA_SLOT_ANY);
+                       if (echan->slot[i] < 0) {
+                               dev_err(dev, "Failed to allocate slot\n");
+                               return NULL;
+                       }
+               }
+
+               if (i == nslots - 1) {
+                       memcpy(&edesc->pset[i], &edesc->pset[0],
+                              sizeof(edesc->pset[0]));
+                       break;
+               }
+
+               ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+                                      dst_addr, burst, dev_width, period_len,
+                                      direction);
+               if (ret < 0)
+                       return NULL;
 
-               edesc->pset[i].a_b_cnt = bcnt << 16 | acnt;
-               edesc->pset[i].ccnt = ccnt;
-               edesc->pset[i].link_bcntrld = 0xffffffff;
+               if (direction == DMA_DEV_TO_MEM)
+                       dst_addr += period_len;
+               else
+                       src_addr += period_len;
 
+               dev_dbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
+               dev_dbg(dev,
+                       "\n pset[%d]:\n"
+                       "  chnum\t%d\n"
+                       "  slot\t%d\n"
+                       "  opt\t%08x\n"
+                       "  src\t%08x\n"
+                       "  dst\t%08x\n"
+                       "  abcnt\t%08x\n"
+                       "  ccnt\t%08x\n"
+                       "  bidx\t%08x\n"
+                       "  cidx\t%08x\n"
+                       "  lkrld\t%08x\n",
+                       i, echan->ch_num, echan->slot[i],
+                       edesc->pset[i].opt,
+                       edesc->pset[i].src,
+                       edesc->pset[i].dst,
+                       edesc->pset[i].a_b_cnt,
+                       edesc->pset[i].ccnt,
+                       edesc->pset[i].src_dst_bidx,
+                       edesc->pset[i].src_dst_cidx,
+                       edesc->pset[i].link_bcntrld);
+
+               edesc->absync = ret;
+
+               /*
+                * Enable interrupts for every period because callback
+                * has to be called for every period.
+                */
+               edesc->pset[i].opt |= TCINTEN;
        }
 
        return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
@@ -406,30 +604,34 @@ static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
        unsigned long flags;
        struct edmacc_param p;
 
-       /* Pause the channel */
-       edma_pause(echan->ch_num);
+       edesc = echan->edesc;
+
+       /* Pause the channel for non-cyclic */
+       if (!edesc || (edesc && !edesc->cyclic))
+               edma_pause(echan->ch_num);
 
        switch (ch_status) {
-       case DMA_COMPLETE:
+       case EDMA_DMA_COMPLETE:
                spin_lock_irqsave(&echan->vchan.lock, flags);
 
-               edesc = echan->edesc;
                if (edesc) {
-                       if (edesc->processed == edesc->pset_nr) {
+                       if (edesc->cyclic) {
+                               vchan_cyclic_callback(&edesc->vdesc);
+                       } else if (edesc->processed == edesc->pset_nr) {
                                dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
                                edma_stop(echan->ch_num);
                                vchan_cookie_complete(&edesc->vdesc);
+                               edma_execute(echan);
                        } else {
                                dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+                               edma_execute(echan);
                        }
-
-                       edma_execute(echan);
                }
 
                spin_unlock_irqrestore(&echan->vchan.lock, flags);
 
                break;
-       case DMA_CC_ERROR:
+       case EDMA_DMA_CC_ERROR:
                spin_lock_irqsave(&echan->vchan.lock, flags);
 
                edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
@@ -579,7 +781,7 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
        unsigned long flags;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS || !txstate)
+       if (ret == DMA_COMPLETE || !txstate)
                return ret;
 
        spin_lock_irqsave(&echan->vchan.lock, flags);
@@ -619,6 +821,7 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
                          struct device *dev)
 {
        dma->device_prep_slave_sg = edma_prep_slave_sg;
+       dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
        dma->device_alloc_chan_resources = edma_alloc_chan_resources;
        dma->device_free_chan_resources = edma_free_chan_resources;
        dma->device_issue_pending = edma_issue_pending;
index 591cd8c63abbcb081a4cd2ca264ed118f7f3d782..cb4bf682a70863e6253396eb717cc7e9ae86e453 100644 (file)
@@ -733,28 +733,6 @@ static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac)
        spin_unlock_irqrestore(&edmac->lock, flags);
 }
 
-static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc)
-{
-       struct device *dev = desc->txd.chan->device->dev;
-
-       if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       dma_unmap_single(dev, desc->src_addr, desc->size,
-                                        DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(dev, desc->src_addr, desc->size,
-                                      DMA_TO_DEVICE);
-       }
-       if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       dma_unmap_single(dev, desc->dst_addr, desc->size,
-                                        DMA_FROM_DEVICE);
-               else
-                       dma_unmap_page(dev, desc->dst_addr, desc->size,
-                                      DMA_FROM_DEVICE);
-       }
-}
-
 static void ep93xx_dma_tasklet(unsigned long data)
 {
        struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
@@ -787,13 +765,7 @@ static void ep93xx_dma_tasklet(unsigned long data)
 
        /* Now we can release all the chained descriptors */
        list_for_each_entry_safe(desc, d, &list, node) {
-               /*
-                * For the memcpy channels the API requires us to unmap the
-                * buffers unless requested otherwise.
-                */
-               if (!edmac->chan.private)
-                       ep93xx_dma_unmap_buffers(desc);
-
+               dma_descriptor_unmap(&desc->txd);
                ep93xx_dma_desc_put(edmac, desc);
        }
 
index 61517dd0d0b73b7077be4acc5cf075792a00d86e..7086a16a55f2ed488573e475e615600c80be767e 100644 (file)
@@ -870,22 +870,7 @@ static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
        /* Run any dependencies */
        dma_run_dependencies(txd);
 
-       /* Unmap the dst buffer, if requested */
-       if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
-               else
-                       dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
-       }
-
-       /* Unmap the src buffer, if requested */
-       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
-       }
-
+       dma_descriptor_unmap(txd);
 #ifdef FSL_DMA_LD_DEBUG
        chan_dbg(chan, "LD %p free\n", desc);
 #endif
@@ -1255,7 +1240,9 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev,
        WARN_ON(fdev->feature != chan->feature);
 
        chan->dev = fdev->dev;
-       chan->id = ((res.start - 0x100) & 0xfff) >> 7;
+       chan->id = (res.start & 0xfff) < 0x300 ?
+                  ((res.start - 0x100) & 0xfff) >> 7 :
+                  ((res.start - 0x200) & 0xfff) >> 7;
        if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
                dev_err(fdev->dev, "too many channels for device\n");
                err = -EINVAL;
@@ -1428,6 +1415,7 @@ static int fsldma_of_remove(struct platform_device *op)
 }
 
 static const struct of_device_id fsldma_of_ids[] = {
+       { .compatible = "fsl,elo3-dma", },
        { .compatible = "fsl,eloplus-dma", },
        { .compatible = "fsl,elo-dma", },
        {}
@@ -1449,7 +1437,7 @@ static struct platform_driver fsldma_of_driver = {
 
 static __init int fsldma_init(void)
 {
-       pr_info("Freescale Elo / Elo Plus DMA driver\n");
+       pr_info("Freescale Elo series DMA driver\n");
        return platform_driver_register(&fsldma_of_driver);
 }
 
@@ -1461,5 +1449,5 @@ static void __exit fsldma_exit(void)
 subsys_initcall(fsldma_init);
 module_exit(fsldma_exit);
 
-MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_DESCRIPTION("Freescale Elo series DMA driver");
 MODULE_LICENSE("GPL");
index f5c38791fc7466f5d683b2ee49e718d8c29d5ca6..1ffc24484d23cdb0edd1e6011605aff5c9a6eb07 100644 (file)
@@ -112,7 +112,7 @@ struct fsldma_chan_regs {
 };
 
 struct fsldma_chan;
-#define FSL_DMA_MAX_CHANS_PER_DEVICE 4
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 8
 
 struct fsldma_device {
        void __iomem *regs;     /* DGSR register base */
index 55852c02679143f453286f189e68fd777ea1afaa..6f9ac2022abd8b3d23c739bc7face1cf662a39f3 100644 (file)
@@ -572,9 +572,11 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
 
                imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
 
-               dev_dbg(imxdma->dev, "%s channel: %d dest=0x%08x src=0x%08x "
-                       "dma_length=%d\n", __func__, imxdmac->channel,
-                       d->dest, d->src, d->len);
+               dev_dbg(imxdma->dev,
+                       "%s channel: %d dest=0x%08llx src=0x%08llx dma_length=%zu\n",
+                       __func__, imxdmac->channel,
+                       (unsigned long long)d->dest,
+                       (unsigned long long)d->src, d->len);
 
                break;
        /* Cyclic transfer is the same as slave_sg with special sg configuration. */
@@ -586,20 +588,22 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
                        imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
                                         DMA_CCR(imxdmac->channel));
 
-                       dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
-                               "total length=%d dev_addr=0x%08x (dev2mem)\n",
-                               __func__, imxdmac->channel, d->sg, d->sgcount,
-                               d->len, imxdmac->per_address);
+                       dev_dbg(imxdma->dev,
+                               "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (dev2mem)\n",
+                               __func__, imxdmac->channel,
+                               d->sg, d->sgcount, d->len,
+                               (unsigned long long)imxdmac->per_address);
                } else if (d->direction == DMA_MEM_TO_DEV) {
                        imx_dmav1_writel(imxdma, imxdmac->per_address,
                                         DMA_DAR(imxdmac->channel));
                        imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
                                         DMA_CCR(imxdmac->channel));
 
-                       dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
-                               "total length=%d dev_addr=0x%08x (mem2dev)\n",
-                               __func__, imxdmac->channel, d->sg, d->sgcount,
-                               d->len, imxdmac->per_address);
+                       dev_dbg(imxdma->dev,
+                               "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (mem2dev)\n",
+                               __func__, imxdmac->channel,
+                               d->sg, d->sgcount, d->len,
+                               (unsigned long long)imxdmac->per_address);
                } else {
                        dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
                                __func__, imxdmac->channel);
@@ -771,7 +775,7 @@ static int imxdma_alloc_chan_resources(struct dma_chan *chan)
                desc->desc.tx_submit = imxdma_tx_submit;
                /* txd.flags will be overwritten in prep funcs */
                desc->desc.flags = DMA_CTRL_ACK;
-               desc->status = DMA_SUCCESS;
+               desc->status = DMA_COMPLETE;
 
                list_add_tail(&desc->node, &imxdmac->ld_free);
                imxdmac->descs_allocated++;
@@ -870,7 +874,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
        int i;
        unsigned int periods = buf_len / period_len;
 
-       dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
+       dev_dbg(imxdma->dev, "%s channel: %d buf_len=%zu period_len=%zu\n",
                        __func__, imxdmac->channel, buf_len, period_len);
 
        if (list_empty(&imxdmac->ld_free) ||
@@ -926,8 +930,9 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy(
        struct imxdma_engine *imxdma = imxdmac->imxdma;
        struct imxdma_desc *desc;
 
-       dev_dbg(imxdma->dev, "%s channel: %d src=0x%x dst=0x%x len=%d\n",
-                       __func__, imxdmac->channel, src, dest, len);
+       dev_dbg(imxdma->dev, "%s channel: %d src=0x%llx dst=0x%llx len=%zu\n",
+               __func__, imxdmac->channel, (unsigned long long)src,
+               (unsigned long long)dest, len);
 
        if (list_empty(&imxdmac->ld_free) ||
            imxdma_chan_is_doing_cyclic(imxdmac))
@@ -956,9 +961,10 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
        struct imxdma_engine *imxdma = imxdmac->imxdma;
        struct imxdma_desc *desc;
 
-       dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n"
-               "   src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__,
-               imxdmac->channel, xt->src_start, xt->dst_start,
+       dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%llx dst_start=0x%llx\n"
+               "   src_sgl=%s dst_sgl=%s numf=%zu frame_size=%zu\n", __func__,
+               imxdmac->channel, (unsigned long long)xt->src_start,
+               (unsigned long long) xt->dst_start,
                xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
                xt->numf, xt->frame_size);
 
index c1fd504cae282491969886b0d0336784229e422d..c75679d420286c522679cdb9c344549c97e7a0c4 100644 (file)
@@ -638,7 +638,7 @@ static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
        if (error)
                sdmac->status = DMA_ERROR;
        else
-               sdmac->status = DMA_SUCCESS;
+               sdmac->status = DMA_COMPLETE;
 
        dma_cookie_complete(&sdmac->desc);
        if (sdmac->desc.callback)
@@ -1089,8 +1089,8 @@ static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
                        param &= ~BD_CONT;
                }
 
-               dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
-                               i, count, sg->dma_address,
+               dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+                               i, count, (u64)sg->dma_address,
                                param & BD_WRAP ? "wrap" : "",
                                param & BD_INTR ? " intr" : "");
 
@@ -1163,8 +1163,8 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
                if (i + 1 == num_periods)
                        param |= BD_WRAP;
 
-               dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
-                               i, period_len, dma_addr,
+               dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+                               i, period_len, (u64)dma_addr,
                                param & BD_WRAP ? "wrap" : "",
                                param & BD_INTR ? " intr" : "");
 
index a975ebebea8aaf9b8950497eefdcaf3793d930d2..1aab8130efa1c75ae51906938447c91d12a66ddc 100644 (file)
@@ -309,7 +309,7 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
                callback_txd(param_txd);
        }
        if (midc->raw_tfr) {
-               desc->status = DMA_SUCCESS;
+               desc->status = DMA_COMPLETE;
                if (desc->lli != NULL) {
                        pci_pool_free(desc->lli_pool, desc->lli,
                                                desc->lli_phys);
@@ -481,7 +481,7 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
        enum dma_status ret;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_SUCCESS) {
+       if (ret != DMA_COMPLETE) {
                spin_lock_bh(&midc->lock);
                midc_scan_descriptors(to_middma_device(chan->device), midc);
                spin_unlock_bh(&midc->lock);
index 5ff6fc1819dc6a2e90c035956b23e23c56f9bb5d..1a49c777607c50d313482f3ead21c19572a1cf8d 100644 (file)
@@ -531,21 +531,6 @@ static void ioat1_cleanup_event(unsigned long data)
        writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 }
 
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
-                   size_t len, struct ioat_dma_descriptor *hw)
-{
-       struct pci_dev *pdev = chan->device->pdev;
-       size_t offset = len - hw->size;
-
-       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
-               ioat_unmap(pdev, hw->dst_addr - offset, len,
-                          PCI_DMA_FROMDEVICE, flags, 1);
-
-       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
-               ioat_unmap(pdev, hw->src_addr - offset, len,
-                          PCI_DMA_TODEVICE, flags, 0);
-}
-
 dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
 {
        dma_addr_t phys_complete;
@@ -602,7 +587,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
                dump_desc_dbg(ioat, desc);
                if (tx->cookie) {
                        dma_cookie_complete(tx);
-                       ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+                       dma_descriptor_unmap(tx);
                        ioat->active -= desc->hw->tx_cnt;
                        if (tx->callback) {
                                tx->callback(tx->callback_param);
@@ -733,7 +718,7 @@ ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
        enum dma_status ret;
 
        ret = dma_cookie_status(c, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        device->cleanup_fn((unsigned long) c);
@@ -833,8 +818,7 @@ int ioat_dma_self_test(struct ioatdma_device *device)
 
        dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
        dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
-       flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP |
-               DMA_PREP_INTERRUPT;
+       flags = DMA_PREP_INTERRUPT;
        tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
                                                   IOAT_TEST_SIZE, flags);
        if (!tx) {
@@ -859,7 +843,7 @@ int ioat_dma_self_test(struct ioatdma_device *device)
 
        if (tmo == 0 ||
            dma->device_tx_status(dma_chan, cookie, NULL)
-                                       != DMA_SUCCESS) {
+                                       != DMA_COMPLETE) {
                dev_err(dev, "Self-test copy timed out, disabling\n");
                err = -ENODEV;
                goto unmap_dma;
@@ -885,8 +869,7 @@ static char ioat_interrupt_style[32] = "msix";
 module_param_string(ioat_interrupt_style, ioat_interrupt_style,
                    sizeof(ioat_interrupt_style), 0644);
 MODULE_PARM_DESC(ioat_interrupt_style,
-                "set ioat interrupt style: msix (default), "
-                "msix-single-vector, msi, intx)");
+                "set ioat interrupt style: msix (default), msi, intx");
 
 /**
  * ioat_dma_setup_interrupts - setup interrupt handler
@@ -904,8 +887,6 @@ int ioat_dma_setup_interrupts(struct ioatdma_device *device)
 
        if (!strcmp(ioat_interrupt_style, "msix"))
                goto msix;
-       if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
-               goto msix_single_vector;
        if (!strcmp(ioat_interrupt_style, "msi"))
                goto msi;
        if (!strcmp(ioat_interrupt_style, "intx"))
@@ -920,10 +901,8 @@ msix:
                device->msix_entries[i].entry = i;
 
        err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
-       if (err < 0)
+       if (err)
                goto msi;
-       if (err > 0)
-               goto msix_single_vector;
 
        for (i = 0; i < msixcnt; i++) {
                msix = &device->msix_entries[i];
@@ -937,29 +916,13 @@ msix:
                                chan = ioat_chan_by_index(device, j);
                                devm_free_irq(dev, msix->vector, chan);
                        }
-                       goto msix_single_vector;
+                       goto msi;
                }
        }
        intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
        device->irq_mode = IOAT_MSIX;
        goto done;
 
-msix_single_vector:
-       msix = &device->msix_entries[0];
-       msix->entry = 0;
-       err = pci_enable_msix(pdev, device->msix_entries, 1);
-       if (err)
-               goto msi;
-
-       err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
-                              "ioat-msix", device);
-       if (err) {
-               pci_disable_msix(pdev);
-               goto msi;
-       }
-       device->irq_mode = IOAT_MSIX_SINGLE;
-       goto done;
-
 msi:
        err = pci_enable_msi(pdev);
        if (err)
@@ -971,7 +934,7 @@ msi:
                pci_disable_msi(pdev);
                goto intx;
        }
-       device->irq_mode = IOAT_MSIX;
+       device->irq_mode = IOAT_MSI;
        goto done;
 
 intx:
index 54fb7b9ff9aaa4afb88c823b3a129a22440a9320..11fb877ddca9a9b0888d23952dea8fb48245b617 100644 (file)
@@ -52,7 +52,6 @@
 enum ioat_irq_mode {
        IOAT_NOIRQ = 0,
        IOAT_MSIX,
-       IOAT_MSIX_SINGLE,
        IOAT_MSI,
        IOAT_INTX
 };
@@ -83,7 +82,6 @@ struct ioatdma_device {
        struct pci_pool *completion_pool;
 #define MAX_SED_POOLS  5
        struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
-       struct kmem_cache *sed_pool;
        struct dma_device common;
        u8 version;
        struct msix_entry msix_entries[4];
@@ -342,16 +340,6 @@ static inline bool is_ioat_bug(unsigned long err)
        return !!err;
 }
 
-static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
-                             int direction, enum dma_ctrl_flags flags, bool dst)
-{
-       if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
-           (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
-               pci_unmap_single(pdev, addr, len, direction);
-       else
-               pci_unmap_page(pdev, addr, len, direction);
-}
-
 int ioat_probe(struct ioatdma_device *device);
 int ioat_register(struct ioatdma_device *device);
 int ioat1_dma_probe(struct ioatdma_device *dev, int dca);
@@ -363,8 +351,6 @@ void ioat_init_channel(struct ioatdma_device *device,
                       struct ioat_chan_common *chan, int idx);
 enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
                                   struct dma_tx_state *txstate);
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
-                   size_t len, struct ioat_dma_descriptor *hw);
 bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
                           dma_addr_t *phys_complete);
 void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
index b925e1b1d139bddbc6edf86f34d4b943ebfb086c..5d3affe7e976165ec5576ac8c6551dd438af7786 100644 (file)
@@ -148,7 +148,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
                tx = &desc->txd;
                dump_desc_dbg(ioat, desc);
                if (tx->cookie) {
-                       ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+                       dma_descriptor_unmap(tx);
                        dma_cookie_complete(tx);
                        if (tx->callback) {
                                tx->callback(tx->callback_param);
index 212d584fe4272a37d947cc72b95e836d801b77b1..470292767e68e81e390e1b9065954fd2546e04e6 100644 (file)
@@ -157,7 +157,6 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
 
 int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
 int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
-void ioat3_dma_remove(struct ioatdma_device *dev);
 struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
index d8ececaf1b57082cc5709aca68714542657b5566..820817e97e626a498561a9e5f0f3a61f22ffc9fa 100644 (file)
@@ -67,6 +67,8 @@
 #include "dma.h"
 #include "dma_v2.h"
 
+extern struct kmem_cache *ioat3_sed_cache;
+
 /* ioat hardware assumes at least two sources for raid operations */
 #define src_cnt_to_sw(x) ((x) + 2)
 #define src_cnt_to_hw(x) ((x) - 2)
@@ -87,22 +89,8 @@ static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
 static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
                                        0, 1, 2, 3, 4, 5, 6 };
 
-/*
- * technically sources 1 and 2 do not require SED, but the op will have
- * at least 9 descriptors so that's irrelevant.
- */
-static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                     1, 1, 1, 1, 1, 1, 1 };
-
 static void ioat3_eh(struct ioat2_dma_chan *ioat);
 
-static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
-{
-       struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
-
-       return raw->field[xor_idx_to_field[idx]];
-}
-
 static void xor_set_src(struct ioat_raw_descriptor *descs[2],
                        dma_addr_t addr, u32 offset, int idx)
 {
@@ -135,12 +123,6 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2],
        pq->coef[idx] = coef;
 }
 
-static int sed_get_pq16_pool_idx(int src_cnt)
-{
-
-       return pq16_idx_to_sed[src_cnt];
-}
-
 static bool is_jf_ioat(struct pci_dev *pdev)
 {
        switch (pdev->device) {
@@ -272,7 +254,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
        struct ioat_sed_ent *sed;
        gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
 
-       sed = kmem_cache_alloc(device->sed_pool, flags);
+       sed = kmem_cache_alloc(ioat3_sed_cache, flags);
        if (!sed)
                return NULL;
 
@@ -280,7 +262,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
        sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
                                 flags, &sed->dma);
        if (!sed->hw) {
-               kmem_cache_free(device->sed_pool, sed);
+               kmem_cache_free(ioat3_sed_cache, sed);
                return NULL;
        }
 
@@ -293,165 +275,7 @@ static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *s
                return;
 
        dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
-       kmem_cache_free(device->sed_pool, sed);
-}
-
-static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
-                           struct ioat_ring_ent *desc, int idx)
-{
-       struct ioat_chan_common *chan = &ioat->base;
-       struct pci_dev *pdev = chan->device->pdev;
-       size_t len = desc->len;
-       size_t offset = len - desc->hw->size;
-       struct dma_async_tx_descriptor *tx = &desc->txd;
-       enum dma_ctrl_flags flags = tx->flags;
-
-       switch (desc->hw->ctl_f.op) {
-       case IOAT_OP_COPY:
-               if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
-                       ioat_dma_unmap(chan, flags, len, desc->hw);
-               break;
-       case IOAT_OP_XOR_VAL:
-       case IOAT_OP_XOR: {
-               struct ioat_xor_descriptor *xor = desc->xor;
-               struct ioat_ring_ent *ext;
-               struct ioat_xor_ext_descriptor *xor_ex = NULL;
-               int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
-               struct ioat_raw_descriptor *descs[2];
-               int i;
-
-               if (src_cnt > 5) {
-                       ext = ioat2_get_ring_ent(ioat, idx + 1);
-                       xor_ex = ext->xor_ex;
-               }
-
-               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       descs[0] = (struct ioat_raw_descriptor *) xor;
-                       descs[1] = (struct ioat_raw_descriptor *) xor_ex;
-                       for (i = 0; i < src_cnt; i++) {
-                               dma_addr_t src = xor_get_src(descs, i);
-
-                               ioat_unmap(pdev, src - offset, len,
-                                          PCI_DMA_TODEVICE, flags, 0);
-                       }
-
-                       /* dest is a source in xor validate operations */
-                       if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
-                               ioat_unmap(pdev, xor->dst_addr - offset, len,
-                                          PCI_DMA_TODEVICE, flags, 1);
-                               break;
-                       }
-               }
-
-               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
-                       ioat_unmap(pdev, xor->dst_addr - offset, len,
-                                  PCI_DMA_FROMDEVICE, flags, 1);
-               break;
-       }
-       case IOAT_OP_PQ_VAL:
-       case IOAT_OP_PQ: {
-               struct ioat_pq_descriptor *pq = desc->pq;
-               struct ioat_ring_ent *ext;
-               struct ioat_pq_ext_descriptor *pq_ex = NULL;
-               int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
-               struct ioat_raw_descriptor *descs[2];
-               int i;
-
-               if (src_cnt > 3) {
-                       ext = ioat2_get_ring_ent(ioat, idx + 1);
-                       pq_ex = ext->pq_ex;
-               }
-
-               /* in the 'continue' case don't unmap the dests as sources */
-               if (dmaf_p_disabled_continue(flags))
-                       src_cnt--;
-               else if (dmaf_continue(flags))
-                       src_cnt -= 3;
-
-               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       descs[0] = (struct ioat_raw_descriptor *) pq;
-                       descs[1] = (struct ioat_raw_descriptor *) pq_ex;
-                       for (i = 0; i < src_cnt; i++) {
-                               dma_addr_t src = pq_get_src(descs, i);
-
-                               ioat_unmap(pdev, src - offset, len,
-                                          PCI_DMA_TODEVICE, flags, 0);
-                       }
-
-                       /* the dests are sources in pq validate operations */
-                       if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
-                               if (!(flags & DMA_PREP_PQ_DISABLE_P))
-                                       ioat_unmap(pdev, pq->p_addr - offset,
-                                                  len, PCI_DMA_TODEVICE, flags, 0);
-                               if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-                                       ioat_unmap(pdev, pq->q_addr - offset,
-                                                  len, PCI_DMA_TODEVICE, flags, 0);
-                               break;
-                       }
-               }
-
-               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       if (!(flags & DMA_PREP_PQ_DISABLE_P))
-                               ioat_unmap(pdev, pq->p_addr - offset, len,
-                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
-                       if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-                               ioat_unmap(pdev, pq->q_addr - offset, len,
-                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
-               }
-               break;
-       }
-       case IOAT_OP_PQ_16S:
-       case IOAT_OP_PQ_VAL_16S: {
-               struct ioat_pq_descriptor *pq = desc->pq;
-               int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
-               struct ioat_raw_descriptor *descs[4];
-               int i;
-
-               /* in the 'continue' case don't unmap the dests as sources */
-               if (dmaf_p_disabled_continue(flags))
-                       src_cnt--;
-               else if (dmaf_continue(flags))
-                       src_cnt -= 3;
-
-               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       descs[0] = (struct ioat_raw_descriptor *)pq;
-                       descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw);
-                       descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]);
-                       for (i = 0; i < src_cnt; i++) {
-                               dma_addr_t src = pq16_get_src(descs, i);
-
-                               ioat_unmap(pdev, src - offset, len,
-                                          PCI_DMA_TODEVICE, flags, 0);
-                       }
-
-                       /* the dests are sources in pq validate operations */
-                       if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
-                               if (!(flags & DMA_PREP_PQ_DISABLE_P))
-                                       ioat_unmap(pdev, pq->p_addr - offset,
-                                                  len, PCI_DMA_TODEVICE,
-                                                  flags, 0);
-                               if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-                                       ioat_unmap(pdev, pq->q_addr - offset,
-                                                  len, PCI_DMA_TODEVICE,
-                                                  flags, 0);
-                               break;
-                       }
-               }
-
-               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       if (!(flags & DMA_PREP_PQ_DISABLE_P))
-                               ioat_unmap(pdev, pq->p_addr - offset, len,
-                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
-                       if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-                               ioat_unmap(pdev, pq->q_addr - offset, len,
-                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
-               }
-               break;
-       }
-       default:
-               dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
-                       __func__, desc->hw->ctl_f.op);
-       }
+       kmem_cache_free(ioat3_sed_cache, sed);
 }
 
 static bool desc_has_ext(struct ioat_ring_ent *desc)
@@ -577,7 +401,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
                tx = &desc->txd;
                if (tx->cookie) {
                        dma_cookie_complete(tx);
-                       ioat3_dma_unmap(ioat, desc, idx + i);
+                       dma_descriptor_unmap(tx);
                        if (tx->callback) {
                                tx->callback(tx->callback_param);
                                tx->callback = NULL;
@@ -807,7 +631,7 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
        enum dma_status ret;
 
        ret = dma_cookie_status(c, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        ioat3_cleanup(ioat);
@@ -1129,9 +953,6 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
        u8 op;
        int i, s, idx, num_descs;
 
-       /* this function only handles src_cnt 9 - 16 */
-       BUG_ON(src_cnt < 9);
-
        /* this function is only called with 9-16 sources */
        op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
 
@@ -1159,8 +980,7 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
 
                descs[0] = (struct ioat_raw_descriptor *) pq;
 
-               desc->sed = ioat3_alloc_sed(device,
-                                           sed_get_pq16_pool_idx(src_cnt));
+               desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3);
                if (!desc->sed) {
                        dev_err(to_dev(chan),
                                "%s: no free sed entries\n", __func__);
@@ -1218,13 +1038,21 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
        return &desc->txd;
 }
 
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+       if (dmaf_p_disabled_continue(flags))
+               return src_cnt + 1;
+       else if (dmaf_continue(flags))
+               return src_cnt + 3;
+       else
+               return src_cnt;
+}
+
 static struct dma_async_tx_descriptor *
 ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
              unsigned int src_cnt, const unsigned char *scf, size_t len,
              unsigned long flags)
 {
-       struct dma_device *dma = chan->device;
-
        /* specify valid address for disabled result */
        if (flags & DMA_PREP_PQ_DISABLE_P)
                dst[0] = dst[1];
@@ -1244,7 +1072,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
                single_source_coef[0] = scf[0];
                single_source_coef[1] = 0;
 
-               return (src_cnt > 8) && (dma->max_pq > 8) ?
+               return src_cnt_flags(src_cnt, flags) > 8 ?
                        __ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
                                               2, single_source_coef, len,
                                               flags) :
@@ -1252,7 +1080,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
                                             single_source_coef, len, flags);
 
        } else {
-               return (src_cnt > 8) && (dma->max_pq > 8) ?
+               return src_cnt_flags(src_cnt, flags) > 8 ?
                        __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
                                               scf, len, flags) :
                        __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
@@ -1265,8 +1093,6 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
                  unsigned int src_cnt, const unsigned char *scf, size_t len,
                  enum sum_check_flags *pqres, unsigned long flags)
 {
-       struct dma_device *dma = chan->device;
-
        /* specify valid address for disabled result */
        if (flags & DMA_PREP_PQ_DISABLE_P)
                pq[0] = pq[1];
@@ -1278,7 +1104,7 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
         */
        *pqres = 0;
 
-       return (src_cnt > 8) && (dma->max_pq > 8) ?
+       return src_cnt_flags(src_cnt, flags) > 8 ?
                __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
                                       flags) :
                __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
@@ -1289,7 +1115,6 @@ static struct dma_async_tx_descriptor *
 ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
                 unsigned int src_cnt, size_t len, unsigned long flags)
 {
-       struct dma_device *dma = chan->device;
        unsigned char scf[src_cnt];
        dma_addr_t pq[2];
 
@@ -1298,7 +1123,7 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
        flags |= DMA_PREP_PQ_DISABLE_Q;
        pq[1] = dst; /* specify valid address for disabled result */
 
-       return (src_cnt > 8) && (dma->max_pq > 8) ?
+       return src_cnt_flags(src_cnt, flags) > 8 ?
                __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
                                       flags) :
                __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
@@ -1310,7 +1135,6 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
                     unsigned int src_cnt, size_t len,
                     enum sum_check_flags *result, unsigned long flags)
 {
-       struct dma_device *dma = chan->device;
        unsigned char scf[src_cnt];
        dma_addr_t pq[2];
 
@@ -1324,8 +1148,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
        flags |= DMA_PREP_PQ_DISABLE_Q;
        pq[1] = pq[0]; /* specify valid address for disabled result */
 
-
-       return (src_cnt > 8) && (dma->max_pq > 8) ?
+       return src_cnt_flags(src_cnt, flags) > 8 ?
                __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
                                       scf, len, flags) :
                __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
@@ -1444,9 +1267,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
                                           DMA_TO_DEVICE);
        tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
                                      IOAT_NUM_SRC_TEST, PAGE_SIZE,
-                                     DMA_PREP_INTERRUPT |
-                                     DMA_COMPL_SKIP_SRC_UNMAP |
-                                     DMA_COMPL_SKIP_DEST_UNMAP);
+                                     DMA_PREP_INTERRUPT);
 
        if (!tx) {
                dev_err(dev, "Self-test xor prep failed\n");
@@ -1468,7 +1289,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 
        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 
-       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                dev_err(dev, "Self-test xor timed out\n");
                err = -ENODEV;
                goto dma_unmap;
@@ -1507,9 +1328,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
                                           DMA_TO_DEVICE);
        tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
                                          IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-                                         &xor_val_result, DMA_PREP_INTERRUPT |
-                                         DMA_COMPL_SKIP_SRC_UNMAP |
-                                         DMA_COMPL_SKIP_DEST_UNMAP);
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
        if (!tx) {
                dev_err(dev, "Self-test zero prep failed\n");
                err = -ENODEV;
@@ -1530,7 +1349,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 
        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 
-       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                dev_err(dev, "Self-test validate timed out\n");
                err = -ENODEV;
                goto dma_unmap;
@@ -1545,6 +1364,8 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
                goto free_resources;
        }
 
+       memset(page_address(dest), 0, PAGE_SIZE);
+
        /* test for non-zero parity sum */
        op = IOAT_OP_XOR_VAL;
 
@@ -1554,9 +1375,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
                                           DMA_TO_DEVICE);
        tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
                                          IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-                                         &xor_val_result, DMA_PREP_INTERRUPT |
-                                         DMA_COMPL_SKIP_SRC_UNMAP |
-                                         DMA_COMPL_SKIP_DEST_UNMAP);
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
        if (!tx) {
                dev_err(dev, "Self-test 2nd zero prep failed\n");
                err = -ENODEV;
@@ -1577,7 +1396,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 
        tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 
-       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                dev_err(dev, "Self-test 2nd validate timed out\n");
                err = -ENODEV;
                goto dma_unmap;
@@ -1630,52 +1449,36 @@ static int ioat3_dma_self_test(struct ioatdma_device *device)
 
 static int ioat3_irq_reinit(struct ioatdma_device *device)
 {
-       int msixcnt = device->common.chancnt;
        struct pci_dev *pdev = device->pdev;
-       int i;
-       struct msix_entry *msix;
-       struct ioat_chan_common *chan;
-       int err = 0;
+       int irq = pdev->irq, i;
+
+       if (!is_bwd_ioat(pdev))
+               return 0;
 
        switch (device->irq_mode) {
        case IOAT_MSIX:
+               for (i = 0; i < device->common.chancnt; i++) {
+                       struct msix_entry *msix = &device->msix_entries[i];
+                       struct ioat_chan_common *chan;
 
-               for (i = 0; i < msixcnt; i++) {
-                       msix = &device->msix_entries[i];
                        chan = ioat_chan_by_index(device, i);
                        devm_free_irq(&pdev->dev, msix->vector, chan);
                }
 
                pci_disable_msix(pdev);
                break;
-
-       case IOAT_MSIX_SINGLE:
-               msix = &device->msix_entries[0];
-               chan = ioat_chan_by_index(device, 0);
-               devm_free_irq(&pdev->dev, msix->vector, chan);
-               pci_disable_msix(pdev);
-               break;
-
        case IOAT_MSI:
-               chan = ioat_chan_by_index(device, 0);
-               devm_free_irq(&pdev->dev, pdev->irq, chan);
                pci_disable_msi(pdev);
-               break;
-
+               /* fall through */
        case IOAT_INTX:
-               chan = ioat_chan_by_index(device, 0);
-               devm_free_irq(&pdev->dev, pdev->irq, chan);
+               devm_free_irq(&pdev->dev, irq, device);
                break;
-
        default:
                return 0;
        }
-
        device->irq_mode = IOAT_NOIRQ;
 
-       err = ioat_dma_setup_interrupts(device);
-
-       return err;
+       return ioat_dma_setup_interrupts(device);
 }
 
 static int ioat3_reset_hw(struct ioat_chan_common *chan)
@@ -1718,14 +1521,12 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan)
        }
 
        err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
-       if (err) {
-               dev_err(&pdev->dev, "Failed to reset!\n");
-               return err;
-       }
-
-       if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev))
+       if (!err)
                err = ioat3_irq_reinit(device);
 
+       if (err)
+               dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+
        return err;
 }
 
@@ -1835,21 +1636,15 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
                char pool_name[14];
                int i;
 
-               /* allocate sw descriptor pool for SED */
-               device->sed_pool = kmem_cache_create("ioat_sed",
-                               sizeof(struct ioat_sed_ent), 0, 0, NULL);
-               if (!device->sed_pool)
-                       return -ENOMEM;
-
                for (i = 0; i < MAX_SED_POOLS; i++) {
                        snprintf(pool_name, 14, "ioat_hw%d_sed", i);
 
                        /* allocate SED DMA pool */
-                       device->sed_hw_pool[i] = dma_pool_create(pool_name,
+                       device->sed_hw_pool[i] = dmam_pool_create(pool_name,
                                        &pdev->dev,
                                        SED_SIZE * (i + 1), 64, 0);
                        if (!device->sed_hw_pool[i])
-                               goto sed_pool_cleanup;
+                               return -ENOMEM;
 
                }
        }
@@ -1875,28 +1670,4 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
                device->dca = ioat3_dca_init(pdev, device->reg_base);
 
        return 0;
-
-sed_pool_cleanup:
-       if (device->sed_pool) {
-               int i;
-               kmem_cache_destroy(device->sed_pool);
-
-               for (i = 0; i < MAX_SED_POOLS; i++)
-                       if (device->sed_hw_pool[i])
-                               dma_pool_destroy(device->sed_hw_pool[i]);
-       }
-
-       return -ENOMEM;
-}
-
-void ioat3_dma_remove(struct ioatdma_device *device)
-{
-       if (device->sed_pool) {
-               int i;
-               kmem_cache_destroy(device->sed_pool);
-
-               for (i = 0; i < MAX_SED_POOLS; i++)
-                       if (device->sed_hw_pool[i])
-                               dma_pool_destroy(device->sed_hw_pool[i]);
-       }
 }
index 2c8d560e6334123097627ab59ac166a47cb1f0d6..1d051cd045dbc43b5d4b53d42008660f62765ddd 100644 (file)
@@ -123,6 +123,7 @@ module_param(ioat_dca_enabled, int, 0644);
 MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
 
 struct kmem_cache *ioat2_cache;
+struct kmem_cache *ioat3_sed_cache;
 
 #define DRV_NAME "ioatdma"
 
@@ -207,9 +208,6 @@ static void ioat_remove(struct pci_dev *pdev)
        if (!device)
                return;
 
-       if (device->version >= IOAT_VER_3_0)
-               ioat3_dma_remove(device);
-
        dev_err(&pdev->dev, "Removing dma and dca services\n");
        if (device->dca) {
                unregister_dca_provider(device->dca, &pdev->dev);
@@ -221,7 +219,7 @@ static void ioat_remove(struct pci_dev *pdev)
 
 static int __init ioat_init_module(void)
 {
-       int err;
+       int err = -ENOMEM;
 
        pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
                DRV_NAME, IOAT_DMA_VERSION);
@@ -231,9 +229,21 @@ static int __init ioat_init_module(void)
        if (!ioat2_cache)
                return -ENOMEM;
 
+       ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+       if (!ioat3_sed_cache)
+               goto err_ioat2_cache;
+
        err = pci_register_driver(&ioat_pci_driver);
        if (err)
-               kmem_cache_destroy(ioat2_cache);
+               goto err_ioat3_cache;
+
+       return 0;
+
+ err_ioat3_cache:
+       kmem_cache_destroy(ioat3_sed_cache);
+
+ err_ioat2_cache:
+       kmem_cache_destroy(ioat2_cache);
 
        return err;
 }
index dd8b44a56e5d0f7090b8dd65bce87a73a60c90ef..c56137bc3868da7cdc43bf38fab53a30ca410430 100644 (file)
@@ -61,80 +61,6 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
        }
 }
 
-static void
-iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
-       struct dma_async_tx_descriptor *tx = &desc->async_tx;
-       struct iop_adma_desc_slot *unmap = desc->group_head;
-       struct device *dev = &iop_chan->device->pdev->dev;
-       u32 len = unmap->unmap_len;
-       enum dma_ctrl_flags flags = tx->flags;
-       u32 src_cnt;
-       dma_addr_t addr;
-       dma_addr_t dest;
-
-       src_cnt = unmap->unmap_src_cnt;
-       dest = iop_desc_get_dest_addr(unmap, iop_chan);
-       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               enum dma_data_direction dir;
-
-               if (src_cnt > 1) /* is xor? */
-                       dir = DMA_BIDIRECTIONAL;
-               else
-                       dir = DMA_FROM_DEVICE;
-
-               dma_unmap_page(dev, dest, len, dir);
-       }
-
-       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               while (src_cnt--) {
-                       addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
-                       if (addr == dest)
-                               continue;
-                       dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
-               }
-       }
-       desc->group_head = NULL;
-}
-
-static void
-iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
-       struct dma_async_tx_descriptor *tx = &desc->async_tx;
-       struct iop_adma_desc_slot *unmap = desc->group_head;
-       struct device *dev = &iop_chan->device->pdev->dev;
-       u32 len = unmap->unmap_len;
-       enum dma_ctrl_flags flags = tx->flags;
-       u32 src_cnt = unmap->unmap_src_cnt;
-       dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
-       dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
-       int i;
-
-       if (tx->flags & DMA_PREP_CONTINUE)
-               src_cnt -= 3;
-
-       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
-               dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
-               dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
-       }
-
-       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               dma_addr_t addr;
-
-               for (i = 0; i < src_cnt; i++) {
-                       addr = iop_desc_get_src_addr(unmap, iop_chan, i);
-                       dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
-               }
-               if (desc->pq_check_result) {
-                       dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
-                       dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
-               }
-       }
-
-       desc->group_head = NULL;
-}
-
-
 static dma_cookie_t
 iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
        struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
@@ -152,15 +78,9 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
                if (tx->callback)
                        tx->callback(tx->callback_param);
 
-               /* unmap dma addresses
-                * (unmap_single vs unmap_page?)
-                */
-               if (desc->group_head && desc->unmap_len) {
-                       if (iop_desc_is_pq(desc))
-                               iop_desc_unmap_pq(iop_chan, desc);
-                       else
-                               iop_desc_unmap(iop_chan, desc);
-               }
+               dma_descriptor_unmap(tx);
+               if (desc->group_head)
+                       desc->group_head = NULL;
        }
 
        /* run dependent operations */
@@ -591,7 +511,6 @@ iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
        if (sw_desc) {
                grp_start = sw_desc->group_head;
                iop_desc_init_interrupt(grp_start, iop_chan);
-               grp_start->unmap_len = 0;
                sw_desc->async_tx.flags = flags;
        }
        spin_unlock_bh(&iop_chan->lock);
@@ -623,8 +542,6 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
                iop_desc_set_byte_count(grp_start, iop_chan, len);
                iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
                iop_desc_set_memcpy_src_addr(grp_start, dma_src);
-               sw_desc->unmap_src_cnt = 1;
-               sw_desc->unmap_len = len;
                sw_desc->async_tx.flags = flags;
        }
        spin_unlock_bh(&iop_chan->lock);
@@ -657,8 +574,6 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
                iop_desc_init_xor(grp_start, src_cnt, flags);
                iop_desc_set_byte_count(grp_start, iop_chan, len);
                iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
-               sw_desc->unmap_src_cnt = src_cnt;
-               sw_desc->unmap_len = len;
                sw_desc->async_tx.flags = flags;
                while (src_cnt--)
                        iop_desc_set_xor_src_addr(grp_start, src_cnt,
@@ -694,8 +609,6 @@ iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
                grp_start->xor_check_result = result;
                pr_debug("\t%s: grp_start->xor_check_result: %p\n",
                        __func__, grp_start->xor_check_result);
-               sw_desc->unmap_src_cnt = src_cnt;
-               sw_desc->unmap_len = len;
                sw_desc->async_tx.flags = flags;
                while (src_cnt--)
                        iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
@@ -748,8 +661,6 @@ iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
                        dst[0] = dst[1] & 0x7;
 
                iop_desc_set_pq_addr(g, dst);
-               sw_desc->unmap_src_cnt = src_cnt;
-               sw_desc->unmap_len = len;
                sw_desc->async_tx.flags = flags;
                for (i = 0; i < src_cnt; i++)
                        iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
@@ -804,8 +715,6 @@ iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
                g->pq_check_result = pqres;
                pr_debug("\t%s: g->pq_check_result: %p\n",
                        __func__, g->pq_check_result);
-               sw_desc->unmap_src_cnt = src_cnt+2;
-               sw_desc->unmap_len = len;
                sw_desc->async_tx.flags = flags;
                while (src_cnt--)
                        iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
@@ -864,7 +773,7 @@ static enum dma_status iop_adma_status(struct dma_chan *chan,
        int ret;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        iop_adma_slot_cleanup(iop_chan);
@@ -983,7 +892,7 @@ static int iop_adma_memcpy_self_test(struct iop_adma_device *device)
        msleep(1);
 
        if (iop_adma_status(dma_chan, cookie, NULL) !=
-                       DMA_SUCCESS) {
+                       DMA_COMPLETE) {
                dev_err(dma_chan->device->dev,
                        "Self-test copy timed out, disabling\n");
                err = -ENODEV;
@@ -1083,7 +992,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
        msleep(8);
 
        if (iop_adma_status(dma_chan, cookie, NULL) !=
-               DMA_SUCCESS) {
+               DMA_COMPLETE) {
                dev_err(dma_chan->device->dev,
                        "Self-test xor timed out, disabling\n");
                err = -ENODEV;
@@ -1129,7 +1038,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
        iop_adma_issue_pending(dma_chan);
        msleep(8);
 
-       if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                dev_err(dma_chan->device->dev,
                        "Self-test zero sum timed out, disabling\n");
                err = -ENODEV;
@@ -1158,7 +1067,7 @@ iop_adma_xor_val_self_test(struct iop_adma_device *device)
        iop_adma_issue_pending(dma_chan);
        msleep(8);
 
-       if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+       if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
                dev_err(dma_chan->device->dev,
                        "Self-test non-zero sum timed out, disabling\n");
                err = -ENODEV;
@@ -1254,7 +1163,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
        msleep(8);
 
        if (iop_adma_status(dma_chan, cookie, NULL) !=
-               DMA_SUCCESS) {
+               DMA_COMPLETE) {
                dev_err(dev, "Self-test pq timed out, disabling\n");
                err = -ENODEV;
                goto free_resources;
@@ -1291,7 +1200,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
        msleep(8);
 
        if (iop_adma_status(dma_chan, cookie, NULL) !=
-               DMA_SUCCESS) {
+               DMA_COMPLETE) {
                dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
                err = -ENODEV;
                goto free_resources;
@@ -1323,7 +1232,7 @@ iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
        msleep(8);
 
        if (iop_adma_status(dma_chan, cookie, NULL) !=
-               DMA_SUCCESS) {
+               DMA_COMPLETE) {
                dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
                err = -ENODEV;
                goto free_resources;
index cb9c0bc317e89ed6acebba276ed1be2788cc901e..128ca143486d1b59c0106cb8a177b5a2e1b803fd 100644 (file)
@@ -1232,8 +1232,10 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
        desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
        descnew = desc;
 
-       dev_dbg(dev, "IDMAC irq %d, dma 0x%08x, next dma 0x%08x, current %d, curbuf 0x%08x\n",
-               irq, sg_dma_address(*sg), sgnext ? sg_dma_address(sgnext) : 0, ichan->active_buffer, curbuf);
+       dev_dbg(dev, "IDMAC irq %d, dma %#llx, next dma %#llx, current %d, curbuf %#x\n",
+               irq, (u64)sg_dma_address(*sg),
+               sgnext ? (u64)sg_dma_address(sgnext) : 0,
+               ichan->active_buffer, curbuf);
 
        /* Find the descriptor of sgnext */
        sgnew = idmac_sg_next(ichan, &descnew, *sg);
index a2c330f5f9521302ed04998dfb38eecb60b2f894..e26075408e9b95a365dfd188cad786593604412f 100644 (file)
@@ -344,7 +344,7 @@ static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
        size_t bytes = 0;
 
        ret = dma_cookie_status(&c->vc.chan, cookie, state);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        spin_lock_irqsave(&c->vc.lock, flags);
@@ -693,7 +693,7 @@ static int k3_dma_probe(struct platform_device *op)
 
        irq = platform_get_irq(op, 0);
        ret = devm_request_irq(&op->dev, irq,
-                       k3_dma_int_handler, IRQF_DISABLED, DRIVER_NAME, d);
+                       k3_dma_int_handler, 0, DRIVER_NAME, d);
        if (ret)
                return ret;
 
index ff8d7827f8cbe80e2c66d78db1f50ad6fdd91b5a..dcb1e05149a7664c6e65a214d783080d540aaafd 100644 (file)
@@ -798,8 +798,7 @@ static void dma_do_tasklet(unsigned long data)
                 * move the descriptors to a temporary list so we can drop
                 * the lock during the entire cleanup operation
                 */
-               list_del(&desc->node);
-               list_add(&desc->node, &chain_cleanup);
+               list_move(&desc->node, &chain_cleanup);
 
                /*
                 * Look for the first list entry which has the ENDIRQEN flag
@@ -863,7 +862,7 @@ static int mmp_pdma_chan_init(struct mmp_pdma_device *pdev,
 
        if (irq) {
                ret = devm_request_irq(pdev->dev, irq,
-                       mmp_pdma_chan_handler, IRQF_DISABLED, "pdma", phy);
+                       mmp_pdma_chan_handler, 0, "pdma", phy);
                if (ret) {
                        dev_err(pdev->dev, "channel request irq fail!\n");
                        return ret;
@@ -970,7 +969,7 @@ static int mmp_pdma_probe(struct platform_device *op)
                /* all chan share one irq, demux inside */
                irq = platform_get_irq(op, 0);
                ret = devm_request_irq(pdev->dev, irq,
-                       mmp_pdma_int_handler, IRQF_DISABLED, "pdma", pdev);
+                       mmp_pdma_int_handler, 0, "pdma", pdev);
                if (ret)
                        return ret;
        }
index d3b6358e5a27037281f6da2efb6dad3e926b9fb1..3ddacc14a7366611ffb089d21c70fb4ba3c896c5 100644 (file)
 #define TDCR_BURSTSZ_16B       (0x3 << 6)
 #define TDCR_BURSTSZ_32B       (0x6 << 6)
 #define TDCR_BURSTSZ_64B       (0x7 << 6)
+#define TDCR_BURSTSZ_SQU_1B            (0x5 << 6)
+#define TDCR_BURSTSZ_SQU_2B            (0x6 << 6)
+#define TDCR_BURSTSZ_SQU_4B            (0x0 << 6)
+#define TDCR_BURSTSZ_SQU_8B            (0x1 << 6)
+#define TDCR_BURSTSZ_SQU_16B   (0x3 << 6)
 #define TDCR_BURSTSZ_SQU_32B   (0x7 << 6)
 #define TDCR_BURSTSZ_128B      (0x5 << 6)
 #define TDCR_DSTDIR_MSK                (0x3 << 4)      /* Dst Direction */
@@ -158,7 +163,7 @@ static void mmp_tdma_disable_chan(struct mmp_tdma_chan *tdmac)
        /* disable irq */
        writel(0, tdmac->reg_base + TDIMR);
 
-       tdmac->status = DMA_SUCCESS;
+       tdmac->status = DMA_COMPLETE;
 }
 
 static void mmp_tdma_resume_chan(struct mmp_tdma_chan *tdmac)
@@ -228,8 +233,31 @@ static int mmp_tdma_config_chan(struct mmp_tdma_chan *tdmac)
                        return -EINVAL;
                }
        } else if (tdmac->type == PXA910_SQU) {
-               tdcr |= TDCR_BURSTSZ_SQU_32B;
                tdcr |= TDCR_SSPMOD;
+
+               switch (tdmac->burst_sz) {
+               case 1:
+                       tdcr |= TDCR_BURSTSZ_SQU_1B;
+                       break;
+               case 2:
+                       tdcr |= TDCR_BURSTSZ_SQU_2B;
+                       break;
+               case 4:
+                       tdcr |= TDCR_BURSTSZ_SQU_4B;
+                       break;
+               case 8:
+                       tdcr |= TDCR_BURSTSZ_SQU_8B;
+                       break;
+               case 16:
+                       tdcr |= TDCR_BURSTSZ_SQU_16B;
+                       break;
+               case 32:
+                       tdcr |= TDCR_BURSTSZ_SQU_32B;
+                       break;
+               default:
+                       dev_err(tdmac->dev, "mmp_tdma: unknown burst size.\n");
+                       return -EINVAL;
+               }
        }
 
        writel(tdcr, tdmac->reg_base + TDCR);
@@ -324,7 +352,7 @@ static int mmp_tdma_alloc_chan_resources(struct dma_chan *chan)
 
        if (tdmac->irq) {
                ret = devm_request_irq(tdmac->dev, tdmac->irq,
-                       mmp_tdma_chan_handler, IRQF_DISABLED, "tdma", tdmac);
+                       mmp_tdma_chan_handler, 0, "tdma", tdmac);
                if (ret)
                        return ret;
        }
@@ -365,7 +393,7 @@ static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
        int num_periods = buf_len / period_len;
        int i = 0, buf = 0;
 
-       if (tdmac->status != DMA_SUCCESS)
+       if (tdmac->status != DMA_COMPLETE)
                return NULL;
 
        if (period_len > TDMA_MAX_XFER_BYTES) {
@@ -499,7 +527,7 @@ static int mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
        tdmac->idx         = idx;
        tdmac->type        = type;
        tdmac->reg_base    = (unsigned long)tdev->base + idx * 4;
-       tdmac->status = DMA_SUCCESS;
+       tdmac->status = DMA_COMPLETE;
        tdev->tdmac[tdmac->idx] = tdmac;
        tasklet_init(&tdmac->tasklet, dma_do_tasklet, (unsigned long)tdmac);
 
@@ -554,7 +582,7 @@ static int mmp_tdma_probe(struct platform_device *pdev)
        if (irq_num != chan_num) {
                irq = platform_get_irq(pdev, 0);
                ret = devm_request_irq(&pdev->dev, irq,
-                       mmp_tdma_int_handler, IRQF_DISABLED, "tdma", tdev);
+                       mmp_tdma_int_handler, 0, "tdma", tdev);
                if (ret)
                        return ret;
        }
index 536dcb8ba5fdfe69ed5f726fc6b5897f00266698..7807f0ef4e209c25ad90db9d32f7bc13955391cf 100644 (file)
@@ -60,14 +60,6 @@ static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
        return hw_desc->phy_dest_addr;
 }
 
-static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
-                               int src_idx)
-{
-       struct mv_xor_desc *hw_desc = desc->hw_desc;
-       return hw_desc->phy_src_addr[mv_phy_src_idx(src_idx)];
-}
-
-
 static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
                                   u32 byte_count)
 {
@@ -278,42 +270,9 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
                        desc->async_tx.callback(
                                desc->async_tx.callback_param);
 
-               /* unmap dma addresses
-                * (unmap_single vs unmap_page?)
-                */
-               if (desc->group_head && desc->unmap_len) {
-                       struct mv_xor_desc_slot *unmap = desc->group_head;
-                       struct device *dev = mv_chan_to_devp(mv_chan);
-                       u32 len = unmap->unmap_len;
-                       enum dma_ctrl_flags flags = desc->async_tx.flags;
-                       u32 src_cnt;
-                       dma_addr_t addr;
-                       dma_addr_t dest;
-
-                       src_cnt = unmap->unmap_src_cnt;
-                       dest = mv_desc_get_dest_addr(unmap);
-                       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                               enum dma_data_direction dir;
-
-                               if (src_cnt > 1) /* is xor ? */
-                                       dir = DMA_BIDIRECTIONAL;
-                               else
-                                       dir = DMA_FROM_DEVICE;
-                               dma_unmap_page(dev, dest, len, dir);
-                       }
-
-                       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                               while (src_cnt--) {
-                                       addr = mv_desc_get_src_addr(unmap,
-                                                                   src_cnt);
-                                       if (addr == dest)
-                                               continue;
-                                       dma_unmap_page(dev, addr, len,
-                                                      DMA_TO_DEVICE);
-                               }
-                       }
+               dma_descriptor_unmap(&desc->async_tx);
+               if (desc->group_head)
                        desc->group_head = NULL;
-               }
        }
 
        /* run dependent operations */
@@ -749,7 +708,7 @@ static enum dma_status mv_xor_status(struct dma_chan *chan,
        enum dma_status ret;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS) {
+       if (ret == DMA_COMPLETE) {
                mv_xor_clean_completed_slots(mv_chan);
                return ret;
        }
@@ -874,7 +833,7 @@ static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan)
        msleep(1);
 
        if (mv_xor_status(dma_chan, cookie, NULL) !=
-           DMA_SUCCESS) {
+           DMA_COMPLETE) {
                dev_err(dma_chan->device->dev,
                        "Self-test copy timed out, disabling\n");
                err = -ENODEV;
@@ -968,7 +927,7 @@ mv_xor_xor_self_test(struct mv_xor_chan *mv_chan)
        msleep(8);
 
        if (mv_xor_status(dma_chan, cookie, NULL) !=
-           DMA_SUCCESS) {
+           DMA_COMPLETE) {
                dev_err(dma_chan->device->dev,
                        "Self-test xor timed out, disabling\n");
                err = -ENODEV;
@@ -1076,10 +1035,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
        }
 
        mv_chan->mmr_base = xordev->xor_base;
-       if (!mv_chan->mmr_base) {
-               ret = -ENOMEM;
-               goto err_free_dma;
-       }
+       mv_chan->mmr_high_base = xordev->xor_high_base;
        tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
                     mv_chan);
 
@@ -1138,7 +1094,7 @@ static void
 mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
                         const struct mbus_dram_target_info *dram)
 {
-       void __iomem *base = xordev->xor_base;
+       void __iomem *base = xordev->xor_high_base;
        u32 win_enable = 0;
        int i;
 
index 06b067f24c9b33f7592d65a4ece98bf99c3cb776..d0749229c875187a454c498964155c37e765d314 100644 (file)
 #define XOR_OPERATION_MODE_MEMCPY      2
 #define XOR_DESCRIPTOR_SWAP            BIT(14)
 
-#define XOR_CURR_DESC(chan)    (chan->mmr_base + 0x210 + (chan->idx * 4))
-#define XOR_NEXT_DESC(chan)    (chan->mmr_base + 0x200 + (chan->idx * 4))
-#define XOR_BYTE_COUNT(chan)   (chan->mmr_base + 0x220 + (chan->idx * 4))
-#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
-#define XOR_BLOCK_SIZE(chan)   (chan->mmr_base + 0x2C0 + (chan->idx * 4))
-#define XOR_INIT_VALUE_LOW(chan)       (chan->mmr_base + 0x2E0)
-#define XOR_INIT_VALUE_HIGH(chan)      (chan->mmr_base + 0x2E4)
+#define XOR_CURR_DESC(chan)    (chan->mmr_high_base + 0x10 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan)    (chan->mmr_high_base + 0x00 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan)   (chan->mmr_high_base + 0x20 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_high_base + 0xB0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan)   (chan->mmr_high_base + 0xC0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan)       (chan->mmr_high_base + 0xE0)
+#define XOR_INIT_VALUE_HIGH(chan)      (chan->mmr_high_base + 0xE4)
 
 #define XOR_CONFIG(chan)       (chan->mmr_base + 0x10 + (chan->idx * 4))
 #define XOR_ACTIVATION(chan)   (chan->mmr_base + 0x20 + (chan->idx * 4))
 #define XOR_ERROR_ADDR(chan)   (chan->mmr_base + 0x60)
 #define XOR_INTR_MASK_VALUE    0x3F5
 
-#define WINDOW_BASE(w)         (0x250 + ((w) << 2))
-#define WINDOW_SIZE(w)         (0x270 + ((w) << 2))
-#define WINDOW_REMAP_HIGH(w)   (0x290 + ((w) << 2))
-#define WINDOW_BAR_ENABLE(chan)        (0x240 + ((chan) << 2))
-#define WINDOW_OVERRIDE_CTRL(chan)     (0x2A0 + ((chan) << 2))
+#define WINDOW_BASE(w)         (0x50 + ((w) << 2))
+#define WINDOW_SIZE(w)         (0x70 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w)   (0x90 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan)        (0x40 + ((chan) << 2))
+#define WINDOW_OVERRIDE_CTRL(chan)     (0xA0 + ((chan) << 2))
 
 struct mv_xor_device {
        void __iomem         *xor_base;
@@ -82,6 +82,7 @@ struct mv_xor_chan {
        int                     pending;
        spinlock_t              lock; /* protects the descriptor slot pool */
        void __iomem            *mmr_base;
+       void __iomem            *mmr_high_base;
        unsigned int            idx;
        int                     irq;
        enum dma_transaction_type       current_type;
index ccd13df841db790ff9eabc9cbc9df79f5f8bb9af..ead491346da70183a3a235687de975f85f89e677 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
+#include <linux/list.h>
 
 #include <asm/irq.h>
 
@@ -57,6 +58,9 @@
        (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x050 : 0x110) + (n) * 0x70)
 #define HW_APBHX_CHn_SEMA(d, n) \
        (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x080 : 0x140) + (n) * 0x70)
+#define HW_APBHX_CHn_BAR(d, n) \
+       (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x070 : 0x130) + (n) * 0x70)
+#define HW_APBX_CHn_DEBUG1(d, n) (0x150 + (n) * 0x70)
 
 /*
  * ccw bits definitions
@@ -115,7 +119,9 @@ struct mxs_dma_chan {
        int                             desc_count;
        enum dma_status                 status;
        unsigned int                    flags;
+       bool                            reset;
 #define MXS_DMA_SG_LOOP                        (1 << 0)
+#define MXS_DMA_USE_SEMAPHORE          (1 << 1)
 };
 
 #define MXS_DMA_CHANNELS               16
@@ -201,12 +207,47 @@ static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
        struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
        int chan_id = mxs_chan->chan.chan_id;
 
-       if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma))
+       /*
+        * mxs dma channel resets can cause a channel stall. To recover from a
+        * channel stall, we have to reset the whole DMA engine. To avoid this,
+        * we use cyclic DMA with semaphores, that are enhanced in
+        * mxs_dma_int_handler. To reset the channel, we can simply stop writing
+        * into the semaphore counter.
+        */
+       if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+                       mxs_chan->flags & MXS_DMA_SG_LOOP) {
+               mxs_chan->reset = true;
+       } else if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) {
                writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
                        mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
-       else
+       } else {
+               unsigned long elapsed = 0;
+               const unsigned long max_wait = 50000; /* 50ms */
+               void __iomem *reg_dbg1 = mxs_dma->base +
+                               HW_APBX_CHn_DEBUG1(mxs_dma, chan_id);
+
+               /*
+                * On i.MX28 APBX, the DMA channel can stop working if we reset
+                * the channel while it is in READ_FLUSH (0x08) state.
+                * We wait here until we leave the state. Then we trigger the
+                * reset. Waiting a maximum of 50ms, the kernel shouldn't crash
+                * because of this.
+                */
+               while ((readl(reg_dbg1) & 0xf) == 0x8 && elapsed < max_wait) {
+                       udelay(100);
+                       elapsed += 100;
+               }
+
+               if (elapsed >= max_wait)
+                       dev_err(&mxs_chan->mxs_dma->pdev->dev,
+                                       "Failed waiting for the DMA channel %d to leave state READ_FLUSH, trying to reset channel in READ_FLUSH state now\n",
+                                       chan_id);
+
                writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
                        mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET);
+       }
+
+       mxs_chan->status = DMA_COMPLETE;
 }
 
 static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
@@ -219,12 +260,21 @@ static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
                mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(mxs_dma, chan_id));
 
        /* write 1 to SEMA to kick off the channel */
-       writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+       if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+                       mxs_chan->flags & MXS_DMA_SG_LOOP) {
+               /* A cyclic DMA consists of at least 2 segments, so initialize
+                * the semaphore with 2 so we have enough time to add 1 to the
+                * semaphore if we need to */
+               writel(2, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+       } else {
+               writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+       }
+       mxs_chan->reset = false;
 }
 
 static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan)
 {
-       mxs_chan->status = DMA_SUCCESS;
+       mxs_chan->status = DMA_COMPLETE;
 }
 
 static void mxs_dma_pause_chan(struct mxs_dma_chan *mxs_chan)
@@ -272,58 +322,88 @@ static void mxs_dma_tasklet(unsigned long data)
                mxs_chan->desc.callback(mxs_chan->desc.callback_param);
 }
 
+static int mxs_dma_irq_to_chan(struct mxs_dma_engine *mxs_dma, int irq)
+{
+       int i;
+
+       for (i = 0; i != mxs_dma->nr_channels; ++i)
+               if (mxs_dma->mxs_chans[i].chan_irq == irq)
+                       return i;
+
+       return -EINVAL;
+}
+
 static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
 {
        struct mxs_dma_engine *mxs_dma = dev_id;
-       u32 stat1, stat2;
+       struct mxs_dma_chan *mxs_chan;
+       u32 completed;
+       u32 err;
+       int chan = mxs_dma_irq_to_chan(mxs_dma, irq);
+
+       if (chan < 0)
+               return IRQ_NONE;
 
        /* completion status */
-       stat1 = readl(mxs_dma->base + HW_APBHX_CTRL1);
-       stat1 &= MXS_DMA_CHANNELS_MASK;
-       writel(stat1, mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
+       completed = readl(mxs_dma->base + HW_APBHX_CTRL1);
+       completed = (completed >> chan) & 0x1;
+
+       /* Clear interrupt */
+       writel((1 << chan),
+                       mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
 
        /* error status */
-       stat2 = readl(mxs_dma->base + HW_APBHX_CTRL2);
-       writel(stat2, mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
+       err = readl(mxs_dma->base + HW_APBHX_CTRL2);
+       err &= (1 << (MXS_DMA_CHANNELS + chan)) | (1 << chan);
+
+       /*
+        * error status bit is in the upper 16 bits, error irq bit in the lower
+        * 16 bits. We transform it into a simpler error code:
+        * err: 0x00 = no error, 0x01 = TERMINATION, 0x02 = BUS_ERROR
+        */
+       err = (err >> (MXS_DMA_CHANNELS + chan)) + (err >> chan);
+
+       /* Clear error irq */
+       writel((1 << chan),
+                       mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
 
        /*
         * When both completion and error of termination bits set at the
         * same time, we do not take it as an error.  IOW, it only becomes
-        * an error we need to handle here in case of either it's (1) a bus
-        * error or (2) a termination error with no completion.
+        * an error we need to handle here in case of either it's a bus
+        * error or a termination error with no completion. 0x01 is termination
+        * error, so we can subtract err & completed to get the real error case.
         */
-       stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */
-               (~(stat2 >> MXS_DMA_CHANNELS) & stat2 & ~stat1); /* (2) */
-
-       /* combine error and completion status for checking */
-       stat1 = (stat2 << MXS_DMA_CHANNELS) | stat1;
-       while (stat1) {
-               int channel = fls(stat1) - 1;
-               struct mxs_dma_chan *mxs_chan =
-                       &mxs_dma->mxs_chans[channel % MXS_DMA_CHANNELS];
-
-               if (channel >= MXS_DMA_CHANNELS) {
-                       dev_dbg(mxs_dma->dma_device.dev,
-                               "%s: error in channel %d\n", __func__,
-                               channel - MXS_DMA_CHANNELS);
-                       mxs_chan->status = DMA_ERROR;
-                       mxs_dma_reset_chan(mxs_chan);
-               } else {
-                       if (mxs_chan->flags & MXS_DMA_SG_LOOP)
-                               mxs_chan->status = DMA_IN_PROGRESS;
-                       else
-                               mxs_chan->status = DMA_SUCCESS;
-               }
+       err -= err & completed;
 
-               stat1 &= ~(1 << channel);
+       mxs_chan = &mxs_dma->mxs_chans[chan];
 
-               if (mxs_chan->status == DMA_SUCCESS)
-                       dma_cookie_complete(&mxs_chan->desc);
+       if (err) {
+               dev_dbg(mxs_dma->dma_device.dev,
+                       "%s: error in channel %d\n", __func__,
+                       chan);
+               mxs_chan->status = DMA_ERROR;
+               mxs_dma_reset_chan(mxs_chan);
+       } else if (mxs_chan->status != DMA_COMPLETE) {
+               if (mxs_chan->flags & MXS_DMA_SG_LOOP) {
+                       mxs_chan->status = DMA_IN_PROGRESS;
+                       if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE)
+                               writel(1, mxs_dma->base +
+                                       HW_APBHX_CHn_SEMA(mxs_dma, chan));
+               } else {
+                       mxs_chan->status = DMA_COMPLETE;
+               }
+       }
 
-               /* schedule tasklet on this channel */
-               tasklet_schedule(&mxs_chan->tasklet);
+       if (mxs_chan->status == DMA_COMPLETE) {
+               if (mxs_chan->reset)
+                       return IRQ_HANDLED;
+               dma_cookie_complete(&mxs_chan->desc);
        }
 
+       /* schedule tasklet on this channel */
+       tasklet_schedule(&mxs_chan->tasklet);
+
        return IRQ_HANDLED;
 }
 
@@ -523,6 +603,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 
        mxs_chan->status = DMA_IN_PROGRESS;
        mxs_chan->flags |= MXS_DMA_SG_LOOP;
+       mxs_chan->flags |= MXS_DMA_USE_SEMAPHORE;
 
        if (num_periods > NUM_CCW) {
                dev_err(mxs_dma->dma_device.dev,
@@ -554,6 +635,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
                ccw->bits |= CCW_IRQ;
                ccw->bits |= CCW_HALT_ON_TERM;
                ccw->bits |= CCW_TERM_FLUSH;
+               ccw->bits |= CCW_DEC_SEM;
                ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
                                MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
 
@@ -599,8 +681,24 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
                        dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
        struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+       struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+       u32 residue = 0;
+
+       if (mxs_chan->status == DMA_IN_PROGRESS &&
+                       mxs_chan->flags & MXS_DMA_SG_LOOP) {
+               struct mxs_dma_ccw *last_ccw;
+               u32 bar;
+
+               last_ccw = &mxs_chan->ccw[mxs_chan->desc_count - 1];
+               residue = last_ccw->xfer_bytes + last_ccw->bufaddr;
+
+               bar = readl(mxs_dma->base +
+                               HW_APBHX_CHn_BAR(mxs_dma, chan->chan_id));
+               residue -= bar;
+       }
 
-       dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
+       dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+                       residue);
 
        return mxs_chan->status;
 }
index ec3fc4fd9160e8aeddf16054cd405f35b43bfef7..2f66cf4e54fe367754378c3c8be213fe20bb8f64 100644 (file)
@@ -248,7 +248,7 @@ static enum dma_status omap_dma_tx_status(struct dma_chan *chan,
        unsigned long flags;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS || !txstate)
+       if (ret == DMA_COMPLETE || !txstate)
                return ret;
 
        spin_lock_irqsave(&c->vc.lock, flags);
index df8b10fd1726ed466d2b05e814a8feb6c711dfe3..cdf0483b8f2dfb8f746b786fd6bf84b0d5f6657b 100644 (file)
@@ -2268,6 +2268,8 @@ static void pl330_tasklet(unsigned long data)
                        list_move_tail(&desc->node, &pch->dmac->desc_pool);
                }
 
+               dma_descriptor_unmap(&desc->txd);
+
                if (callback) {
                        spin_unlock_irqrestore(&pch->lock, flags);
                        callback(callback_param);
@@ -2314,7 +2316,7 @@ bool pl330_filter(struct dma_chan *chan, void *param)
                return false;
 
        peri_id = chan->private;
-       return *peri_id == (unsigned)param;
+       return *peri_id == (unsigned long)param;
 }
 EXPORT_SYMBOL(pl330_filter);
 
@@ -2926,16 +2928,23 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
        amba_set_drvdata(adev, pdmac);
 
-       irq = adev->irq[0];
-       ret = request_irq(irq, pl330_irq_handler, 0,
-                       dev_name(&adev->dev), pi);
-       if (ret)
-               return ret;
+       for (i = 0; i < AMBA_NR_IRQS; i++) {
+               irq = adev->irq[i];
+               if (irq) {
+                       ret = devm_request_irq(&adev->dev, irq,
+                                              pl330_irq_handler, 0,
+                                              dev_name(&adev->dev), pi);
+                       if (ret)
+                               return ret;
+               } else {
+                       break;
+               }
+       }
 
        pi->pcfg.periph_id = adev->periphid;
        ret = pl330_add(pi);
        if (ret)
-               goto probe_err1;
+               return ret;
 
        INIT_LIST_HEAD(&pdmac->desc_pool);
        spin_lock_init(&pdmac->pool_lock);
@@ -3033,8 +3042,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
        return 0;
 probe_err3:
-       amba_set_drvdata(adev, NULL);
-
        /* Idle the DMAC */
        list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
                        chan.device_node) {
@@ -3048,8 +3055,6 @@ probe_err3:
        }
 probe_err2:
        pl330_del(pi);
-probe_err1:
-       free_irq(irq, pi);
 
        return ret;
 }
@@ -3059,7 +3064,6 @@ static int pl330_remove(struct amba_device *adev)
        struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev);
        struct dma_pl330_chan *pch, *_p;
        struct pl330_info *pi;
-       int irq;
 
        if (!pdmac)
                return 0;
@@ -3068,7 +3072,6 @@ static int pl330_remove(struct amba_device *adev)
                of_dma_controller_free(adev->dev.of_node);
 
        dma_async_device_unregister(&pdmac->ddma);
-       amba_set_drvdata(adev, NULL);
 
        /* Idle the DMAC */
        list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
@@ -3086,9 +3089,6 @@ static int pl330_remove(struct amba_device *adev)
 
        pl330_del(pi);
 
-       irq = adev->irq[0];
-       free_irq(irq, pi);
-
        return 0;
 }
 
index e24b5ef486b50819957d02c03f475561bccac4df..8da48c6b2a38ccd76f3e127d53e4be5ab7be425c 100644 (file)
@@ -803,218 +803,6 @@ static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
        local_irq_restore(flags);
 }
 
-/**
- * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
- */
-static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
-                               struct ppc440spe_adma_chan *chan, int src_idx)
-{
-       struct dma_cdb *dma_hw_desc;
-       struct xor_cb *xor_hw_desc;
-
-       switch (chan->device->id) {
-       case PPC440SPE_DMA0_ID:
-       case PPC440SPE_DMA1_ID:
-               dma_hw_desc = desc->hw_desc;
-               /* May have 0, 1, 2, or 3 sources */
-               switch (dma_hw_desc->opc) {
-               case DMA_CDB_OPC_NO_OP:
-               case DMA_CDB_OPC_DFILL128:
-                       return 0;
-               case DMA_CDB_OPC_DCHECK128:
-                       if (unlikely(src_idx)) {
-                               printk(KERN_ERR "%s: try to get %d source for"
-                                   " DCHECK128\n", __func__, src_idx);
-                               BUG();
-                       }
-                       return le32_to_cpu(dma_hw_desc->sg1l);
-               case DMA_CDB_OPC_MULTICAST:
-               case DMA_CDB_OPC_MV_SG1_SG2:
-                       if (unlikely(src_idx > 2)) {
-                               printk(KERN_ERR "%s: try to get %d source from"
-                                   " DMA descr\n", __func__, src_idx);
-                               BUG();
-                       }
-                       if (src_idx) {
-                               if (le32_to_cpu(dma_hw_desc->sg1u) &
-                                   DMA_CUED_XOR_WIN_MSK) {
-                                       u8 region;
-
-                                       if (src_idx == 1)
-                                               return le32_to_cpu(
-                                                   dma_hw_desc->sg1l) +
-                                                       desc->unmap_len;
-
-                                       region = (le32_to_cpu(
-                                           dma_hw_desc->sg1u)) >>
-                                               DMA_CUED_REGION_OFF;
-
-                                       region &= DMA_CUED_REGION_MSK;
-                                       switch (region) {
-                                       case DMA_RXOR123:
-                                               return le32_to_cpu(
-                                                   dma_hw_desc->sg1l) +
-                                                       (desc->unmap_len << 1);
-                                       case DMA_RXOR124:
-                                               return le32_to_cpu(
-                                                   dma_hw_desc->sg1l) +
-                                                       (desc->unmap_len * 3);
-                                       case DMA_RXOR125:
-                                               return le32_to_cpu(
-                                                   dma_hw_desc->sg1l) +
-                                                       (desc->unmap_len << 2);
-                                       default:
-                                               printk(KERN_ERR
-                                                   "%s: try to"
-                                                   " get src3 for region %02x"
-                                                   "PPC440SPE_DESC_RXOR12?\n",
-                                                   __func__, region);
-                                               BUG();
-                                       }
-                               } else {
-                                       printk(KERN_ERR
-                                               "%s: try to get %d"
-                                               " source for non-cued descr\n",
-                                               __func__, src_idx);
-                                       BUG();
-                               }
-                       }
-                       return le32_to_cpu(dma_hw_desc->sg1l);
-               default:
-                       printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-                               __func__, dma_hw_desc->opc);
-                       BUG();
-               }
-               return le32_to_cpu(dma_hw_desc->sg1l);
-       case PPC440SPE_XOR_ID:
-               /* May have up to 16 sources */
-               xor_hw_desc = desc->hw_desc;
-               return xor_hw_desc->ops[src_idx].l;
-       }
-       return 0;
-}
-
-/**
- * ppc440spe_desc_get_dest_addr - extract the destination address from the
- * descriptor
- */
-static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
-                               struct ppc440spe_adma_chan *chan, int idx)
-{
-       struct dma_cdb *dma_hw_desc;
-       struct xor_cb *xor_hw_desc;
-
-       switch (chan->device->id) {
-       case PPC440SPE_DMA0_ID:
-       case PPC440SPE_DMA1_ID:
-               dma_hw_desc = desc->hw_desc;
-
-               if (likely(!idx))
-                       return le32_to_cpu(dma_hw_desc->sg2l);
-               return le32_to_cpu(dma_hw_desc->sg3l);
-       case PPC440SPE_XOR_ID:
-               xor_hw_desc = desc->hw_desc;
-               return xor_hw_desc->cbtal;
-       }
-       return 0;
-}
-
-/**
- * ppc440spe_desc_get_src_num - extract the number of source addresses from
- * the descriptor
- */
-static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
-                               struct ppc440spe_adma_chan *chan)
-{
-       struct dma_cdb *dma_hw_desc;
-       struct xor_cb *xor_hw_desc;
-
-       switch (chan->device->id) {
-       case PPC440SPE_DMA0_ID:
-       case PPC440SPE_DMA1_ID:
-               dma_hw_desc = desc->hw_desc;
-
-               switch (dma_hw_desc->opc) {
-               case DMA_CDB_OPC_NO_OP:
-               case DMA_CDB_OPC_DFILL128:
-                       return 0;
-               case DMA_CDB_OPC_DCHECK128:
-                       return 1;
-               case DMA_CDB_OPC_MV_SG1_SG2:
-               case DMA_CDB_OPC_MULTICAST:
-                       /*
-                        * Only for RXOR operations we have more than
-                        * one source
-                        */
-                       if (le32_to_cpu(dma_hw_desc->sg1u) &
-                           DMA_CUED_XOR_WIN_MSK) {
-                               /* RXOR op, there are 2 or 3 sources */
-                               if (((le32_to_cpu(dma_hw_desc->sg1u) >>
-                                   DMA_CUED_REGION_OFF) &
-                                     DMA_CUED_REGION_MSK) == DMA_RXOR12) {
-                                       /* RXOR 1-2 */
-                                       return 2;
-                               } else {
-                                       /* RXOR 1-2-3/1-2-4/1-2-5 */
-                                       return 3;
-                               }
-                       }
-                       return 1;
-               default:
-                       printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-                               __func__, dma_hw_desc->opc);
-                       BUG();
-               }
-       case PPC440SPE_XOR_ID:
-               /* up to 16 sources */
-               xor_hw_desc = desc->hw_desc;
-               return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
-       default:
-               BUG();
-       }
-       return 0;
-}
-
-/**
- * ppc440spe_desc_get_dst_num - get the number of destination addresses in
- * this descriptor
- */
-static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
-                               struct ppc440spe_adma_chan *chan)
-{
-       struct dma_cdb *dma_hw_desc;
-
-       switch (chan->device->id) {
-       case PPC440SPE_DMA0_ID:
-       case PPC440SPE_DMA1_ID:
-               /* May be 1 or 2 destinations */
-               dma_hw_desc = desc->hw_desc;
-               switch (dma_hw_desc->opc) {
-               case DMA_CDB_OPC_NO_OP:
-               case DMA_CDB_OPC_DCHECK128:
-                       return 0;
-               case DMA_CDB_OPC_MV_SG1_SG2:
-               case DMA_CDB_OPC_DFILL128:
-                       return 1;
-               case DMA_CDB_OPC_MULTICAST:
-                       if (desc->dst_cnt == 2)
-                               return 2;
-                       else
-                               return 1;
-               default:
-                       printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-                               __func__, dma_hw_desc->opc);
-                       BUG();
-               }
-       case PPC440SPE_XOR_ID:
-               /* Always only 1 destination */
-               return 1;
-       default:
-               BUG();
-       }
-       return 0;
-}
-
 /**
  * ppc440spe_desc_get_link - get the address of the descriptor that
  * follows this one
@@ -1707,43 +1495,6 @@ static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
        }
 }
 
-static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan,
-                                struct ppc440spe_adma_desc_slot *desc)
-{
-       u32 src_cnt, dst_cnt;
-       dma_addr_t addr;
-
-       /*
-        * get the number of sources & destination
-        * included in this descriptor and unmap
-        * them all
-        */
-       src_cnt = ppc440spe_desc_get_src_num(desc, chan);
-       dst_cnt = ppc440spe_desc_get_dst_num(desc, chan);
-
-       /* unmap destinations */
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               while (dst_cnt--) {
-                       addr = ppc440spe_desc_get_dest_addr(
-                               desc, chan, dst_cnt);
-                       dma_unmap_page(chan->device->dev,
-                                       addr, desc->unmap_len,
-                                       DMA_FROM_DEVICE);
-               }
-       }
-
-       /* unmap sources */
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               while (src_cnt--) {
-                       addr = ppc440spe_desc_get_src_addr(
-                               desc, chan, src_cnt);
-                       dma_unmap_page(chan->device->dev,
-                                       addr, desc->unmap_len,
-                                       DMA_TO_DEVICE);
-               }
-       }
-}
-
 /**
  * ppc440spe_adma_run_tx_complete_actions - call functions to be called
  * upon completion
@@ -1767,26 +1518,7 @@ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
                        desc->async_tx.callback(
                                desc->async_tx.callback_param);
 
-               /* unmap dma addresses
-                * (unmap_single vs unmap_page?)
-                *
-                * actually, ppc's dma_unmap_page() functions are empty, so
-                * the following code is just for the sake of completeness
-                */
-               if (chan && chan->needs_unmap && desc->group_head &&
-                    desc->unmap_len) {
-                       struct ppc440spe_adma_desc_slot *unmap =
-                                                       desc->group_head;
-                       /* assume 1 slot per op always */
-                       u32 slot_count = unmap->slot_cnt;
-
-                       /* Run through the group list and unmap addresses */
-                       for (i = 0; i < slot_count; i++) {
-                               BUG_ON(!unmap);
-                               ppc440spe_adma_unmap(chan, unmap);
-                               unmap = unmap->hw_next;
-                       }
-               }
+               dma_descriptor_unmap(&desc->async_tx);
        }
 
        /* run dependent operations */
@@ -3893,7 +3625,7 @@ static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
 
        ppc440spe_chan = to_ppc440spe_adma_chan(chan);
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        ppc440spe_adma_slot_cleanup(ppc440spe_chan);
index 461a91ab70bb4feca82cd27c1582f81c2905bcde..ab26d46bbe1598434625979abeb488d5199992d9 100644 (file)
@@ -436,7 +436,7 @@ static enum dma_status sa11x0_dma_tx_status(struct dma_chan *chan,
        enum dma_status ret;
 
        ret = dma_cookie_status(&c->vc.chan, cookie, state);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        if (!state)
index d94ab592cc1bb21b92c851debe381609b599fa48..2e7b394def8058e4d1216ad8a07c785db4d87048 100644 (file)
@@ -724,7 +724,7 @@ static enum dma_status shdma_tx_status(struct dma_chan *chan,
         * If we don't find cookie on the queue, it has been aborted and we have
         * to report error
         */
-       if (status != DMA_SUCCESS) {
+       if (status != DMA_COMPLETE) {
                struct shdma_desc *sdesc;
                status = DMA_ERROR;
                list_for_each_entry(sdesc, &schan->ld_queue, node)
index 1069e8869f20762928ecbbe509b2ed294f82ae35..0d765c0e21ec9de8cb4e25ff66d5fd53ee78d95d 100644 (file)
@@ -685,7 +685,7 @@ MODULE_DEVICE_TABLE(of, sh_dmae_of_match);
 static int sh_dmae_probe(struct platform_device *pdev)
 {
        const struct sh_dmae_pdata *pdata;
-       unsigned long irqflags = IRQF_DISABLED,
+       unsigned long irqflags = 0,
                chan_flag[SH_DMAE_MAX_CHANNELS] = {};
        int errirq, chan_irq[SH_DMAE_MAX_CHANNELS];
        int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
@@ -838,7 +838,7 @@ static int sh_dmae_probe(struct platform_device *pdev)
                                    IORESOURCE_IRQ_SHAREABLE)
                                        chan_flag[irq_cnt] = IRQF_SHARED;
                                else
-                                       chan_flag[irq_cnt] = IRQF_DISABLED;
+                                       chan_flag[irq_cnt] = 0;
                                dev_dbg(&pdev->dev,
                                        "Found IRQ %d for channel %d\n",
                                        i, irq_cnt);
index 82d2b97ad942f96f2064c0ac58b11141fb85b54c..b8c031b7de4e045d22cfa0e7d495380bc94ed75f 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/log2.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/err.h>
@@ -2626,7 +2627,7 @@ static enum dma_status d40_tx_status(struct dma_chan *chan,
        }
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_SUCCESS)
+       if (ret != DMA_COMPLETE)
                dma_set_residue(txstate, stedma40_residue(chan));
 
        if (d40_is_paused(d40c))
@@ -2796,8 +2797,8 @@ static int d40_set_runtime_config(struct dma_chan *chan,
            src_addr_width >  DMA_SLAVE_BUSWIDTH_8_BYTES   ||
            dst_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED ||
            dst_addr_width >  DMA_SLAVE_BUSWIDTH_8_BYTES   ||
-           ((src_addr_width > 1) && (src_addr_width & 1)) ||
-           ((dst_addr_width > 1) && (dst_addr_width & 1)))
+           !is_power_of_2(src_addr_width) ||
+           !is_power_of_2(dst_addr_width))
                return -EINVAL;
 
        cfg->src_info.data_width = src_addr_width;
index 5d4986e5f5fa6b21423084b688bd0a8afbba0c2e..73654e33f13b98c66ebce532646056ecdce79c61 100644 (file)
@@ -570,7 +570,7 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc,
 
        list_del(&sgreq->node);
        if (sgreq->last_sg) {
-               dma_desc->dma_status = DMA_SUCCESS;
+               dma_desc->dma_status = DMA_COMPLETE;
                dma_cookie_complete(&dma_desc->txd);
                if (!dma_desc->cb_count)
                        list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
@@ -768,7 +768,7 @@ static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
        unsigned int residual;
 
        ret = dma_cookie_status(dc, cookie, txstate);
-       if (ret == DMA_SUCCESS)
+       if (ret == DMA_COMPLETE)
                return ret;
 
        spin_lock_irqsave(&tdc->lock, flags);
@@ -1018,7 +1018,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
        return &dma_desc->txd;
 }
 
-struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
+static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
        struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
        size_t period_len, enum dma_transfer_direction direction,
        unsigned long flags, void *context)
index 28af214fce049db85fc02fb903a748fdbef6e0a1..4506a7b4f972319c761ff4ee459a0a38bb7139dd 100644 (file)
@@ -154,38 +154,6 @@ static bool __td_dma_done_ack(struct timb_dma_chan *td_chan)
        return done;
 }
 
-static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc,
-       bool single)
-{
-       dma_addr_t addr;
-       int len;
-
-       addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) |
-               dma_desc[4];
-
-       len = (dma_desc[3] << 8) | dma_desc[2];
-
-       if (single)
-               dma_unmap_single(chan2dev(&td_chan->chan), addr, len,
-                       DMA_TO_DEVICE);
-       else
-               dma_unmap_page(chan2dev(&td_chan->chan), addr, len,
-                       DMA_TO_DEVICE);
-}
-
-static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
-{
-       struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan,
-               struct timb_dma_chan, chan);
-       u8 *descs;
-
-       for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) {
-               __td_unmap_desc(td_chan, descs, single);
-               if (descs[0] & 0x02)
-                       break;
-       }
-}
-
 static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
        struct scatterlist *sg, bool last)
 {
@@ -293,10 +261,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
 
        list_move(&td_desc->desc_node, &td_chan->free_list);
 
-       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
-               __td_unmap_descs(td_desc,
-                       txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE);
-
+       dma_descriptor_unmap(txd);
        /*
         * The API requires that no submissions are done from a
         * callback, so we don't need to drop the lock here
index 71e8e775189e0df5568d474ea00157a2675f9260..bae6c29f5502ab951f7926bdf621977703bc96b1 100644 (file)
@@ -419,30 +419,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
        list_splice_init(&desc->tx_list, &dc->free_list);
        list_move(&desc->desc_node, &dc->free_list);
 
-       if (!ds) {
-               dma_addr_t dmaaddr;
-               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                       dmaaddr = is_dmac64(dc) ?
-                               desc->hwdesc.DAR : desc->hwdesc32.DAR;
-                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                               dma_unmap_single(chan2parent(&dc->chan),
-                                       dmaaddr, desc->len, DMA_FROM_DEVICE);
-                       else
-                               dma_unmap_page(chan2parent(&dc->chan),
-                                       dmaaddr, desc->len, DMA_FROM_DEVICE);
-               }
-               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                       dmaaddr = is_dmac64(dc) ?
-                               desc->hwdesc.SAR : desc->hwdesc32.SAR;
-                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                               dma_unmap_single(chan2parent(&dc->chan),
-                                       dmaaddr, desc->len, DMA_TO_DEVICE);
-                       else
-                               dma_unmap_page(chan2parent(&dc->chan),
-                                       dmaaddr, desc->len, DMA_TO_DEVICE);
-               }
-       }
-
+       dma_descriptor_unmap(txd);
        /*
         * The API requires that no submissions are done from a
         * callback, so we don't need to drop the lock here
@@ -962,8 +939,8 @@ txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
        enum dma_status ret;
 
        ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_SUCCESS)
-               return DMA_SUCCESS;
+       if (ret == DMA_COMPLETE)
+               return DMA_COMPLETE;
 
        spin_lock_bh(&dc->lock);
        txx9dmac_scan_descriptors(dc);
index 7dd4461502940952ebf6323dbb04e1af7e6a5230..4e10b10d3ddde47332f525285e1c549bd9a2c22f 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/acpi_gpio.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
+#include <linux/acpi.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/gpio.h>
index 43959edd4291193a538449c1d4f75bba8d30ea8a..dfff0907f70e53643093c3f6aa8d9258ab8dc5d5 100644 (file)
@@ -196,7 +196,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev)
        acpi_handle dhandle;
        int ret;
 
-       dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       dhandle = ACPI_HANDLE(&pdev->dev);
        if (!dhandle)
                return false;
 
index 1b2f41c3f19122c731356aff91f3b490959bde60..6d69a9bad86545c6a8cfc8e8ff86480d462c2132 100644 (file)
@@ -638,7 +638,7 @@ static void intel_didl_outputs(struct drm_device *dev)
        u32 temp;
        int i = 0;
 
-       handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+       handle = ACPI_HANDLE(&dev->pdev->dev);
        if (!handle || acpi_bus_get_device(handle, &acpi_dev))
                return;
 
index e286e132c7e7d744d88d99522cf2ce733b4899a6..129120473f6c67bb0d90fcc366e3f140c558f79d 100644 (file)
@@ -116,7 +116,7 @@ mxm_shadow_dsm(struct nouveau_mxm *mxm, u8 version)
        acpi_handle handle;
        int ret;
 
-       handle = DEVICE_ACPI_HANDLE(&device->pdev->dev);
+       handle = ACPI_HANDLE(&device->pdev->dev);
        if (!handle)
                return false;
 
index 07273a2ae62f2df8dd2b0f41916feeb4d89bd3cd..95c740454049ad1b4a4cf23c2bc63d7038c7a362 100644 (file)
@@ -256,7 +256,7 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
        acpi_handle dhandle;
        int retval = 0;
 
-       dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       dhandle = ACPI_HANDLE(&pdev->dev);
        if (!dhandle)
                return false;
 
@@ -414,7 +414,7 @@ bool nouveau_acpi_rom_supported(struct pci_dev *pdev)
        if (!nouveau_dsm_priv.dsm_detected && !nouveau_dsm_priv.optimus_detected)
                return false;
 
-       dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       dhandle = ACPI_HANDLE(&pdev->dev);
        if (!dhandle)
                return false;
 
@@ -448,7 +448,7 @@ nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector)
                return NULL;
        }
 
-       handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+       handle = ACPI_HANDLE(&dev->pdev->dev);
        if (!handle)
                return NULL;
 
index 10f98c7742d8c87289e8bbf677857c781fcebbee..98a9074b306b640644f576e928d4e28701495bb4 100644 (file)
@@ -369,7 +369,7 @@ int radeon_atif_handler(struct radeon_device *rdev,
                return NOTIFY_DONE;
 
        /* Check pending SBIOS requests */
-       handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+       handle = ACPI_HANDLE(&rdev->pdev->dev);
        count = radeon_atif_get_sbios_requests(handle, &req);
 
        if (count <= 0)
@@ -556,7 +556,7 @@ int radeon_acpi_pcie_notify_device_ready(struct radeon_device *rdev)
        struct radeon_atcs *atcs = &rdev->atcs;
 
        /* Get the device handle */
-       handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+       handle = ACPI_HANDLE(&rdev->pdev->dev);
        if (!handle)
                return -EINVAL;
 
@@ -596,7 +596,7 @@ int radeon_acpi_pcie_performance_request(struct radeon_device *rdev,
        u32 retry = 3;
 
        /* Get the device handle */
-       handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+       handle = ACPI_HANDLE(&rdev->pdev->dev);
        if (!handle)
                return -EINVAL;
 
@@ -699,7 +699,7 @@ int radeon_acpi_init(struct radeon_device *rdev)
        int ret;
 
        /* Get the device handle */
-       handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+       handle = ACPI_HANDLE(&rdev->pdev->dev);
 
        /* No need to proceed if we're sure that ATIF is not supported */
        if (!ASIC_IS_AVIVO(rdev) || !rdev->bios || !handle)
index 6153ec18943aabb7034010ff1c1abc2ba0a761fe..9d302eaeea1587b6fdb5439119b6f45c0de961db 100644 (file)
@@ -8,8 +8,7 @@
  */
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
+#include <linux/acpi.h>
 #include <linux/pci.h>
 
 #include "radeon_acpi.h"
@@ -447,7 +446,7 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
        acpi_handle dhandle, atpx_handle;
        acpi_status status;
 
-       dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       dhandle = ACPI_HANDLE(&pdev->dev);
        if (!dhandle)
                return false;
 
@@ -493,7 +492,7 @@ static int radeon_atpx_init(void)
  */
 static int radeon_atpx_get_client_id(struct pci_dev *pdev)
 {
-       if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+       if (radeon_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
                return VGA_SWITCHEROO_IGD;
        else
                return VGA_SWITCHEROO_DIS;
index c155d6f3fa68cad15102af67bfd348677c74d6fa..b3633d9a531703a1cd4189c061a0907e89ee1085 100644 (file)
@@ -185,7 +185,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
                return false;
 
        while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
-               dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+               dhandle = ACPI_HANDLE(&pdev->dev);
                if (!dhandle)
                        continue;
 
index ae48d18ee315819d2b8fa359fc4c0269665f6ec5..5f7e55f4b7f052e29f754f78cabe8dd6a4593fb4 100644 (file)
@@ -1008,7 +1008,7 @@ static int i2c_hid_probe(struct i2c_client *client,
        hid->hid_get_raw_report = i2c_hid_get_raw_report;
        hid->hid_output_raw_report = i2c_hid_output_raw_report;
        hid->dev.parent = &client->dev;
-       ACPI_HANDLE_SET(&hid->dev, ACPI_HANDLE(&client->dev));
+       ACPI_COMPANION_SET(&hid->dev, ACPI_COMPANION(&client->dev));
        hid->bus = BUS_I2C;
        hid->version = le16_to_cpu(ihid->hdesc.bcdVersion);
        hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID);
index 5923cfa390c86de6528559c229ace0a4b39b402c..d74c0b34248ea6c38472cc401571d8f519844c4d 100644 (file)
@@ -615,6 +615,22 @@ void i2c_unlock_adapter(struct i2c_adapter *adapter)
 }
 EXPORT_SYMBOL_GPL(i2c_unlock_adapter);
 
+static void i2c_dev_set_name(struct i2c_adapter *adap,
+                            struct i2c_client *client)
+{
+       struct acpi_device *adev = ACPI_COMPANION(&client->dev);
+
+       if (adev) {
+               dev_set_name(&client->dev, "i2c-%s", acpi_dev_name(adev));
+               return;
+       }
+
+       /* For 10-bit clients, add an arbitrary offset to avoid collisions */
+       dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
+                    client->addr | ((client->flags & I2C_CLIENT_TEN)
+                                    ? 0xa000 : 0));
+}
+
 /**
  * i2c_new_device - instantiate an i2c device
  * @adap: the adapter managing the device
@@ -671,12 +687,9 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
        client->dev.bus = &i2c_bus_type;
        client->dev.type = &i2c_client_type;
        client->dev.of_node = info->of_node;
-       ACPI_HANDLE_SET(&client->dev, info->acpi_node.handle);
+       ACPI_COMPANION_SET(&client->dev, info->acpi_node.companion);
 
-       /* For 10-bit clients, add an arbitrary offset to avoid collisions */
-       dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
-                    client->addr | ((client->flags & I2C_CLIENT_TEN)
-                                    ? 0xa000 : 0));
+       i2c_dev_set_name(adap, client);
        status = device_register(&client->dev);
        if (status)
                goto out_err;
@@ -1100,7 +1113,7 @@ static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level,
                return AE_OK;
 
        memset(&info, 0, sizeof(info));
-       info.acpi_node.handle = handle;
+       info.acpi_node.companion = adev;
        info.irq = -1;
 
        INIT_LIST_HEAD(&resource_list);
index 140c8ef505291129d6299d2b4d4931d3b728589e..d9e1f7ccfe6f086df549160a88ca191e52fca32f 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright (C) 2006 Hannes Reinecke
  */
 
+#include <linux/acpi.h>
 #include <linux/ata.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -19,8 +20,6 @@
 #include <linux/dmi.h>
 #include <linux/module.h>
 
-#include <acpi/acpi_bus.h>
-
 #define REGS_PER_GTF           7
 
 struct GTM_buffer {
@@ -128,7 +127,7 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
 
        DEBPRINT("ENTER: pci %02x:%02x.%01x\n", bus, devnum, func);
 
-       dev_handle = DEVICE_ACPI_HANDLE(dev);
+       dev_handle = ACPI_HANDLE(dev);
        if (!dev_handle) {
                DEBPRINT("no acpi handle for device\n");
                goto err;
index 3226ce98fb184df9c0c5666129c1b699cbcc5027..cbd4e9abc47e8f47f512915d224f916166921110 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * intel_idle.c - native hardware idle loop for modern Intel processors
  *
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2013, Intel Corporation.
  * Len Brown <len.brown@intel.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -329,6 +329,22 @@ static struct cpuidle_state atom_cstates[] __initdata = {
        {
                .enter = NULL }
 };
+static struct cpuidle_state avn_cstates[CPUIDLE_STATE_MAX] = {
+       {
+               .name = "C1-AVN",
+               .desc = "MWAIT 0x00",
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 2,
+               .target_residency = 2,
+               .enter = &intel_idle },
+       {
+               .name = "C6-AVN",
+               .desc = "MWAIT 0x51",
+               .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 15,
+               .target_residency = 45,
+               .enter = &intel_idle },
+};
 
 /**
  * intel_idle
@@ -462,6 +478,11 @@ static const struct idle_cpu idle_cpu_hsw = {
        .disable_promotion_to_c1e = true,
 };
 
+static const struct idle_cpu idle_cpu_avn = {
+       .state_table = avn_cstates,
+       .disable_promotion_to_c1e = true,
+};
+
 #define ICPU(model, cpu) \
        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
@@ -483,6 +504,7 @@ static const struct x86_cpu_id intel_idle_ids[] = {
        ICPU(0x3f, idle_cpu_hsw),
        ICPU(0x45, idle_cpu_hsw),
        ICPU(0x46, idle_cpu_hsw),
+       ICPU(0x4D, idle_cpu_avn),
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
index 8766eabb0014a075e5ace35860806fb3bf31be3f..b6b7a2866c9e99533afd59a0a142712f58febe63 100644 (file)
@@ -112,7 +112,7 @@ static inline int speed_max(struct mddev *mddev)
 
 static struct ctl_table_header *raid_table_header;
 
-static ctl_table raid_table[] = {
+static struct ctl_table raid_table[] = {
        {
                .procname       = "speed_limit_min",
                .data           = &sysctl_speed_limit_min,
@@ -130,7 +130,7 @@ static ctl_table raid_table[] = {
        { }
 };
 
-static ctl_table raid_dir_table[] = {
+static struct ctl_table raid_dir_table[] = {
        {
                .procname       = "raid",
                .maxlen         = 0,
@@ -140,7 +140,7 @@ static ctl_table raid_dir_table[] = {
        { }
 };
 
-static ctl_table raid_root_table[] = {
+static struct ctl_table raid_root_table[] = {
        {
                .procname       = "dev",
                .maxlen         = 0,
@@ -562,11 +562,19 @@ static struct mddev * mddev_find(dev_t unit)
        goto retry;
 }
 
-static inline int mddev_lock(struct mddev * mddev)
+static inline int __must_check mddev_lock(struct mddev * mddev)
 {
        return mutex_lock_interruptible(&mddev->reconfig_mutex);
 }
 
+/* Sometimes we need to take the lock in a situation where
+ * failure due to interrupts is not acceptable.
+ */
+static inline void mddev_lock_nointr(struct mddev * mddev)
+{
+       mutex_lock(&mddev->reconfig_mutex);
+}
+
 static inline int mddev_is_locked(struct mddev *mddev)
 {
        return mutex_is_locked(&mddev->reconfig_mutex);
@@ -2978,7 +2986,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
                for_each_mddev(mddev, tmp) {
                        struct md_rdev *rdev2;
 
-                       mddev_lock(mddev);
+                       mddev_lock_nointr(mddev);
                        rdev_for_each(rdev2, mddev)
                                if (rdev->bdev == rdev2->bdev &&
                                    rdev != rdev2 &&
@@ -2994,7 +3002,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
                                break;
                        }
                }
-               mddev_lock(my_mddev);
+               mddev_lock_nointr(my_mddev);
                if (overlap) {
                        /* Someone else could have slipped in a size
                         * change here, but doing so is just silly.
@@ -3580,6 +3588,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
                mddev->in_sync = 1;
                del_timer_sync(&mddev->safemode_timer);
        }
+       blk_set_stacking_limits(&mddev->queue->limits);
        pers->run(mddev);
        set_bit(MD_CHANGE_DEVS, &mddev->flags);
        mddev_resume(mddev);
@@ -5258,7 +5267,7 @@ static void __md_stop_writes(struct mddev *mddev)
 
 void md_stop_writes(struct mddev *mddev)
 {
-       mddev_lock(mddev);
+       mddev_lock_nointr(mddev);
        __md_stop_writes(mddev);
        mddev_unlock(mddev);
 }
@@ -5291,20 +5300,35 @@ EXPORT_SYMBOL_GPL(md_stop);
 static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 {
        int err = 0;
+       int did_freeze = 0;
+
+       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+               did_freeze = 1;
+               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
+       }
+       if (mddev->sync_thread) {
+               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+               /* Thread might be blocked waiting for metadata update
+                * which will now never happen */
+               wake_up_process(mddev->sync_thread->tsk);
+       }
+       mddev_unlock(mddev);
+       wait_event(resync_wait, mddev->sync_thread == NULL);
+       mddev_lock_nointr(mddev);
+
        mutex_lock(&mddev->open_mutex);
-       if (atomic_read(&mddev->openers) > !!bdev) {
+       if (atomic_read(&mddev->openers) > !!bdev ||
+           mddev->sync_thread ||
+           (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
                printk("md: %s still in use.\n",mdname(mddev));
+               if (did_freeze) {
+                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+                       md_wakeup_thread(mddev->thread);
+               }
                err = -EBUSY;
                goto out;
        }
-       if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
-               /* Someone opened the device since we flushed it
-                * so page cache could be dirty and it is too late
-                * to flush.  So abort
-                */
-               mutex_unlock(&mddev->open_mutex);
-               return -EBUSY;
-       }
        if (mddev->pers) {
                __md_stop_writes(mddev);
 
@@ -5315,7 +5339,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
                set_disk_ro(mddev->gendisk, 1);
                clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                sysfs_notify_dirent_safe(mddev->sysfs_state);
-               err = 0;        
+               err = 0;
        }
 out:
        mutex_unlock(&mddev->open_mutex);
@@ -5331,20 +5355,34 @@ static int do_md_stop(struct mddev * mddev, int mode,
 {
        struct gendisk *disk = mddev->gendisk;
        struct md_rdev *rdev;
+       int did_freeze = 0;
+
+       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+               did_freeze = 1;
+               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+               md_wakeup_thread(mddev->thread);
+       }
+       if (mddev->sync_thread) {
+               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+               /* Thread might be blocked waiting for metadata update
+                * which will now never happen */
+               wake_up_process(mddev->sync_thread->tsk);
+       }
+       mddev_unlock(mddev);
+       wait_event(resync_wait, mddev->sync_thread == NULL);
+       mddev_lock_nointr(mddev);
 
        mutex_lock(&mddev->open_mutex);
        if (atomic_read(&mddev->openers) > !!bdev ||
-           mddev->sysfs_active) {
+           mddev->sysfs_active ||
+           mddev->sync_thread ||
+           (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
                printk("md: %s still in use.\n",mdname(mddev));
                mutex_unlock(&mddev->open_mutex);
-               return -EBUSY;
-       }
-       if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
-               /* Someone opened the device since we flushed it
-                * so page cache could be dirty and it is too late
-                * to flush.  So abort
-                */
-               mutex_unlock(&mddev->open_mutex);
+               if (did_freeze) {
+                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+                       md_wakeup_thread(mddev->thread);
+               }
                return -EBUSY;
        }
        if (mddev->pers) {
@@ -6551,7 +6589,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
                                wait_event(mddev->sb_wait,
                                           !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
                                           !test_bit(MD_CHANGE_PENDING, &mddev->flags));
-                               mddev_lock(mddev);
+                               mddev_lock_nointr(mddev);
                        }
                } else {
                        err = -EROFS;
@@ -7361,9 +7399,6 @@ void md_do_sync(struct md_thread *thread)
                mddev->curr_resync = 2;
 
        try_again:
-               if (kthread_should_stop())
-                       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
                if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
                        goto skip;
                for_each_mddev(mddev2, tmp) {
@@ -7388,7 +7423,7 @@ void md_do_sync(struct md_thread *thread)
                                 * be caught by 'softlockup'
                                 */
                                prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
-                               if (!kthread_should_stop() &&
+                               if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
                                    mddev2->curr_resync >= mddev->curr_resync) {
                                        printk(KERN_INFO "md: delaying %s of %s"
                                               " until %s has finished (they"
@@ -7464,7 +7499,7 @@ void md_do_sync(struct md_thread *thread)
        last_check = 0;
 
        if (j>2) {
-               printk(KERN_INFO 
+               printk(KERN_INFO
                       "md: resuming %s of %s from checkpoint.\n",
                       desc, mdname(mddev));
                mddev->curr_resync = j;
@@ -7501,7 +7536,8 @@ void md_do_sync(struct md_thread *thread)
                        sysfs_notify(&mddev->kobj, NULL, "sync_completed");
                }
 
-               while (j >= mddev->resync_max && !kthread_should_stop()) {
+               while (j >= mddev->resync_max &&
+                      !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
                        /* As this condition is controlled by user-space,
                         * we can block indefinitely, so use '_interruptible'
                         * to avoid triggering warnings.
@@ -7509,17 +7545,18 @@ void md_do_sync(struct md_thread *thread)
                        flush_signals(current); /* just in case */
                        wait_event_interruptible(mddev->recovery_wait,
                                                 mddev->resync_max > j
-                                                || kthread_should_stop());
+                                                || test_bit(MD_RECOVERY_INTR,
+                                                            &mddev->recovery));
                }
 
-               if (kthread_should_stop())
-                       goto interrupted;
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       break;
 
                sectors = mddev->pers->sync_request(mddev, j, &skipped,
                                                  currspeed < speed_min(mddev));
                if (sectors == 0) {
                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                       goto out;
+                       break;
                }
 
                if (!skipped) { /* actual IO requested */
@@ -7556,10 +7593,8 @@ void md_do_sync(struct md_thread *thread)
                        last_mark = next;
                }
 
-
-               if (kthread_should_stop())
-                       goto interrupted;
-
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       break;
 
                /*
                 * this loop exits only if either when we are slower than
@@ -7582,11 +7617,12 @@ void md_do_sync(struct md_thread *thread)
                        }
                }
        }
-       printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
+       printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
+              test_bit(MD_RECOVERY_INTR, &mddev->recovery)
+              ? "interrupted" : "done");
        /*
         * this also signals 'finished resyncing' to md_stop
         */
- out:
        blk_finish_plug(&plug);
        wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
@@ -7640,16 +7676,6 @@ void md_do_sync(struct md_thread *thread)
        set_bit(MD_RECOVERY_DONE, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
        return;
-
- interrupted:
-       /*
-        * got a signal, exit.
-        */
-       printk(KERN_INFO
-              "md: md_do_sync() got signal ... exiting\n");
-       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-       goto out;
-
 }
 EXPORT_SYMBOL_GPL(md_do_sync);
 
@@ -7894,6 +7920,7 @@ void md_reap_sync_thread(struct mddev *mddev)
 
        /* resync has finished, collect result */
        md_unregister_thread(&mddev->sync_thread);
+       wake_up(&resync_wait);
        if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
            !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
                /* success...*/
index af6681b19776d2f695030452a9d6ab42821ed0df..1e5a540995e932852df5ff484a96bfcc8636a432 100644 (file)
@@ -66,7 +66,8 @@
  */
 static int max_queued_requests = 1024;
 
-static void allow_barrier(struct r1conf *conf);
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector);
 static void lower_barrier(struct r1conf *conf);
 
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -84,10 +85,12 @@ static void r1bio_pool_free(void *r1_bio, void *data)
 }
 
 #define RESYNC_BLOCK_SIZE (64*1024)
-//#define RESYNC_BLOCK_SIZE PAGE_SIZE
+#define RESYNC_DEPTH 32
 #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
 #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
-#define RESYNC_WINDOW (2048*1024)
+#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
+#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
+#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
 
 static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -225,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
        struct bio *bio = r1_bio->master_bio;
        int done;
        struct r1conf *conf = r1_bio->mddev->private;
+       sector_t start_next_window = r1_bio->start_next_window;
+       sector_t bi_sector = bio->bi_sector;
 
        if (bio->bi_phys_segments) {
                unsigned long flags;
@@ -232,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio)
                bio->bi_phys_segments--;
                done = (bio->bi_phys_segments == 0);
                spin_unlock_irqrestore(&conf->device_lock, flags);
+               /*
+                * make_request() might be waiting for
+                * bi_phys_segments to decrease
+                */
+               wake_up(&conf->wait_barrier);
        } else
                done = 1;
 
@@ -243,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
                 */
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bi_sector);
        }
 }
 
@@ -814,8 +824,6 @@ static void flush_pending_writes(struct r1conf *conf)
  *    there is no normal IO happeing.  It must arrange to call
  *    lower_barrier when the particular background IO completes.
  */
-#define RESYNC_DEPTH 32
-
 static void raise_barrier(struct r1conf *conf)
 {
        spin_lock_irq(&conf->resync_lock);
@@ -827,9 +835,19 @@ static void raise_barrier(struct r1conf *conf)
        /* block any new IO from starting */
        conf->barrier++;
 
-       /* Now wait for all pending IO to complete */
+       /* For these conditions we must wait:
+        * A: while the array is in frozen state
+        * B: while barrier >= RESYNC_DEPTH, meaning resync reach
+        *    the max count which allowed.
+        * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
+        *    next resync will reach to the window which normal bios are
+        *    handling.
+        */
        wait_event_lock_irq(conf->wait_barrier,
-                           !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+                           !conf->array_frozen &&
+                           conf->barrier < RESYNC_DEPTH &&
+                           (conf->start_next_window >=
+                            conf->next_resync + RESYNC_SECTORS),
                            conf->resync_lock);
 
        spin_unlock_irq(&conf->resync_lock);
@@ -845,10 +863,33 @@ static void lower_barrier(struct r1conf *conf)
        wake_up(&conf->wait_barrier);
 }
 
-static void wait_barrier(struct r1conf *conf)
+static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
 {
+       bool wait = false;
+
+       if (conf->array_frozen || !bio)
+               wait = true;
+       else if (conf->barrier && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync < RESYNC_WINDOW_SECTORS)
+                       wait = true;
+               else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
+                               >= bio_end_sector(bio)) ||
+                        (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                               <= bio->bi_sector))
+                       wait = false;
+               else
+                       wait = true;
+       }
+
+       return wait;
+}
+
+static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
+{
+       sector_t sector = 0;
+
        spin_lock_irq(&conf->resync_lock);
-       if (conf->barrier) {
+       if (need_to_wait_for_sync(conf, bio)) {
                conf->nr_waiting++;
                /* Wait for the barrier to drop.
                 * However if there are already pending
@@ -860,22 +901,67 @@ static void wait_barrier(struct r1conf *conf)
                 * count down.
                 */
                wait_event_lock_irq(conf->wait_barrier,
-                                   !conf->barrier ||
-                                   (conf->nr_pending &&
+                                   !conf->array_frozen &&
+                                   (!conf->barrier ||
+                                   ((conf->start_next_window <
+                                     conf->next_resync + RESYNC_SECTORS) &&
                                     current->bio_list &&
-                                    !bio_list_empty(current->bio_list)),
+                                    !bio_list_empty(current->bio_list))),
                                    conf->resync_lock);
                conf->nr_waiting--;
        }
+
+       if (bio && bio_data_dir(bio) == WRITE) {
+               if (conf->next_resync + NEXT_NORMALIO_DISTANCE
+                   <= bio->bi_sector) {
+                       if (conf->start_next_window == MaxSector)
+                               conf->start_next_window =
+                                       conf->next_resync +
+                                       NEXT_NORMALIO_DISTANCE;
+
+                       if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
+                           <= bio->bi_sector)
+                               conf->next_window_requests++;
+                       else
+                               conf->current_window_requests++;
+               }
+               if (bio->bi_sector >= conf->start_next_window)
+                       sector = conf->start_next_window;
+       }
+
        conf->nr_pending++;
        spin_unlock_irq(&conf->resync_lock);
+       return sector;
 }
 
-static void allow_barrier(struct r1conf *conf)
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+                         sector_t bi_sector)
 {
        unsigned long flags;
+
        spin_lock_irqsave(&conf->resync_lock, flags);
        conf->nr_pending--;
+       if (start_next_window) {
+               if (start_next_window == conf->start_next_window) {
+                       if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
+                           <= bi_sector)
+                               conf->next_window_requests--;
+                       else
+                               conf->current_window_requests--;
+               } else
+                       conf->current_window_requests--;
+
+               if (!conf->current_window_requests) {
+                       if (conf->next_window_requests) {
+                               conf->current_window_requests =
+                                       conf->next_window_requests;
+                               conf->next_window_requests = 0;
+                               conf->start_next_window +=
+                                       NEXT_NORMALIO_DISTANCE;
+                       } else
+                               conf->start_next_window = MaxSector;
+               }
+       }
        spin_unlock_irqrestore(&conf->resync_lock, flags);
        wake_up(&conf->wait_barrier);
 }
@@ -884,8 +970,7 @@ static void freeze_array(struct r1conf *conf, int extra)
 {
        /* stop syncio and normal IO and wait for everything to
         * go quite.
-        * We increment barrier and nr_waiting, and then
-        * wait until nr_pending match nr_queued+extra
+        * We wait until nr_pending match nr_queued+extra
         * This is called in the context of one normal IO request
         * that has failed. Thus any sync request that might be pending
         * will be blocked by nr_pending, and we need to wait for
@@ -895,8 +980,7 @@ static void freeze_array(struct r1conf *conf, int extra)
         * we continue.
         */
        spin_lock_irq(&conf->resync_lock);
-       conf->barrier++;
-       conf->nr_waiting++;
+       conf->array_frozen = 1;
        wait_event_lock_irq_cmd(conf->wait_barrier,
                                conf->nr_pending == conf->nr_queued+extra,
                                conf->resync_lock,
@@ -907,8 +991,7 @@ static void unfreeze_array(struct r1conf *conf)
 {
        /* reverse the effect of the freeze */
        spin_lock_irq(&conf->resync_lock);
-       conf->barrier--;
-       conf->nr_waiting--;
+       conf->array_frozen = 0;
        wake_up(&conf->wait_barrier);
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -1013,6 +1096,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        int first_clone;
        int sectors_handled;
        int max_sectors;
+       sector_t start_next_window;
 
        /*
         * Register the new request and wait if the reconstruction
@@ -1042,7 +1126,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                finish_wait(&conf->wait_barrier, &w);
        }
 
-       wait_barrier(conf);
+       start_next_window = wait_barrier(conf, bio);
 
        bitmap = mddev->bitmap;
 
@@ -1163,6 +1247,7 @@ read_again:
 
        disks = conf->raid_disks * 2;
  retry_write:
+       r1_bio->start_next_window = start_next_window;
        blocked_rdev = NULL;
        rcu_read_lock();
        max_sectors = r1_bio->sectors;
@@ -1231,14 +1316,24 @@ read_again:
        if (unlikely(blocked_rdev)) {
                /* Wait for this device to become unblocked */
                int j;
+               sector_t old = start_next_window;
 
                for (j = 0; j < i; j++)
                        if (r1_bio->bios[j])
                                rdev_dec_pending(conf->mirrors[j].rdev, mddev);
                r1_bio->state = 0;
-               allow_barrier(conf);
+               allow_barrier(conf, start_next_window, bio->bi_sector);
                md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               wait_barrier(conf);
+               start_next_window = wait_barrier(conf, bio);
+               /*
+                * We must make sure the multi r1bios of bio have
+                * the same value of bi_phys_segments
+                */
+               if (bio->bi_phys_segments && old &&
+                   old != start_next_window)
+                       /* Wait for the former r1bio(s) to complete */
+                       wait_event(conf->wait_barrier,
+                                  bio->bi_phys_segments == 1);
                goto retry_write;
        }
 
@@ -1438,11 +1533,14 @@ static void print_conf(struct r1conf *conf)
 
 static void close_sync(struct r1conf *conf)
 {
-       wait_barrier(conf);
-       allow_barrier(conf);
+       wait_barrier(conf, NULL);
+       allow_barrier(conf, 0, 0);
 
        mempool_destroy(conf->r1buf_pool);
        conf->r1buf_pool = NULL;
+
+       conf->next_resync = 0;
+       conf->start_next_window = MaxSector;
 }
 
 static int raid1_spare_active(struct mddev *mddev)
@@ -2714,6 +2812,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        conf->pending_count = 0;
        conf->recovery_disabled = mddev->recovery_disabled - 1;
 
+       conf->start_next_window = MaxSector;
+       conf->current_window_requests = conf->next_window_requests = 0;
+
        err = -EIO;
        for (i = 0; i < conf->raid_disks * 2; i++) {
 
@@ -2871,8 +2972,8 @@ static int stop(struct mddev *mddev)
                           atomic_read(&bitmap->behind_writes) == 0);
        }
 
-       raise_barrier(conf);
-       lower_barrier(conf);
+       freeze_array(conf, 0);
+       unfreeze_array(conf);
 
        md_unregister_thread(&mddev->thread);
        if (conf->r1bio_pool)
@@ -3031,10 +3132,10 @@ static void raid1_quiesce(struct mddev *mddev, int state)
                wake_up(&conf->wait_barrier);
                break;
        case 1:
-               raise_barrier(conf);
+               freeze_array(conf, 0);
                break;
        case 0:
-               lower_barrier(conf);
+               unfreeze_array(conf);
                break;
        }
 }
@@ -3051,7 +3152,8 @@ static void *raid1_takeover(struct mddev *mddev)
                mddev->new_chunk_sectors = 0;
                conf = setup_conf(mddev);
                if (!IS_ERR(conf))
-                       conf->barrier = 1;
+                       /* Array must appear to be quiesced */
+                       conf->array_frozen = 1;
                return conf;
        }
        return ERR_PTR(-EINVAL);
index 0ff3715fb7eba5ec4fed61a9922276b07b363aff..9bebca7bff2fbc4ec4780031190e6666f7abf56d 100644 (file)
@@ -41,6 +41,19 @@ struct r1conf {
         */
        sector_t                next_resync;
 
+       /* When raid1 starts resync, we divide array into four partitions
+        * |---------|--------------|---------------------|-------------|
+        *        next_resync   start_next_window       end_window
+        * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
+        * end_window = start_next_window + NEXT_NORMALIO_DISTANCE
+        * current_window_requests means the count of normalIO between
+        *   start_next_window and end_window.
+        * next_window_requests means the count of normalIO after end_window.
+        * */
+       sector_t                start_next_window;
+       int                     current_window_requests;
+       int                     next_window_requests;
+
        spinlock_t              device_lock;
 
        /* list of 'struct r1bio' that need to be processed by raid1d,
@@ -65,6 +78,7 @@ struct r1conf {
        int                     nr_waiting;
        int                     nr_queued;
        int                     barrier;
+       int                     array_frozen;
 
        /* Set to 1 if a full sync is needed, (fresh device added).
         * Cleared when a sync completes.
@@ -111,6 +125,7 @@ struct r1bio {
                                                 * in this BehindIO request
                                                 */
        sector_t                sector;
+       sector_t                start_next_window;
        int                     sectors;
        unsigned long           state;
        struct mddev            *mddev;
index 7c3508abb5e178fe310cae9d2c98352efb34f2af..c504e8389e69e3ab9ad717b9f83d0c19c2008d68 100644 (file)
@@ -4384,7 +4384,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait, mddev->flags == 0 ||
-                          kthread_should_stop());
+                          test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+                       allow_barrier(conf);
+                       return sectors_done;
+               }
                conf->reshape_safe = mddev->reshape_position;
                allow_barrier(conf);
        }
index 7f0e17a27aebcd3b448dca34290ab1c735c89eb1..47da0af6322be1bd7930f902c96c57875799a358 100644 (file)
@@ -85,6 +85,42 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
        return &conf->stripe_hashtbl[hash];
 }
 
+static inline int stripe_hash_locks_hash(sector_t sect)
+{
+       return (sect >> STRIPE_SHIFT) & STRIPE_HASH_LOCKS_MASK;
+}
+
+static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
+{
+       spin_lock_irq(conf->hash_locks + hash);
+       spin_lock(&conf->device_lock);
+}
+
+static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
+{
+       spin_unlock(&conf->device_lock);
+       spin_unlock_irq(conf->hash_locks + hash);
+}
+
+static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
+{
+       int i;
+       local_irq_disable();
+       spin_lock(conf->hash_locks);
+       for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
+               spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
+       spin_lock(&conf->device_lock);
+}
+
+static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
+{
+       int i;
+       spin_unlock(&conf->device_lock);
+       for (i = NR_STRIPE_HASH_LOCKS; i; i--)
+               spin_unlock(conf->hash_locks + i - 1);
+       local_irq_enable();
+}
+
 /* bio's attached to a stripe+device for I/O are linked together in bi_sector
  * order without overlap.  There may be several bio's per stripe+device, and
  * a bio could span several devices.
@@ -249,7 +285,8 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
        }
 }
 
-static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
+                             struct list_head *temp_inactive_list)
 {
        BUG_ON(!list_empty(&sh->lru));
        BUG_ON(atomic_read(&conf->active_stripes)==0);
@@ -278,23 +315,68 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
                            < IO_THRESHOLD)
                                md_wakeup_thread(conf->mddev->thread);
                atomic_dec(&conf->active_stripes);
-               if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
-                       list_add_tail(&sh->lru, &conf->inactive_list);
-                       wake_up(&conf->wait_for_stripe);
-                       if (conf->retry_read_aligned)
-                               md_wakeup_thread(conf->mddev->thread);
-               }
+               if (!test_bit(STRIPE_EXPANDING, &sh->state))
+                       list_add_tail(&sh->lru, temp_inactive_list);
        }
 }
 
-static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
+                            struct list_head *temp_inactive_list)
 {
        if (atomic_dec_and_test(&sh->count))
-               do_release_stripe(conf, sh);
+               do_release_stripe(conf, sh, temp_inactive_list);
+}
+
+/*
+ * @hash could be NR_STRIPE_HASH_LOCKS, then we have a list of inactive_list
+ *
+ * Be careful: Only one task can add/delete stripes from temp_inactive_list at
+ * given time. Adding stripes only takes device lock, while deleting stripes
+ * only takes hash lock.
+ */
+static void release_inactive_stripe_list(struct r5conf *conf,
+                                        struct list_head *temp_inactive_list,
+                                        int hash)
+{
+       int size;
+       bool do_wakeup = false;
+       unsigned long flags;
+
+       if (hash == NR_STRIPE_HASH_LOCKS) {
+               size = NR_STRIPE_HASH_LOCKS;
+               hash = NR_STRIPE_HASH_LOCKS - 1;
+       } else
+               size = 1;
+       while (size) {
+               struct list_head *list = &temp_inactive_list[size - 1];
+
+               /*
+                * We don't hold any lock here yet, get_active_stripe() might
+                * remove stripes from the list
+                */
+               if (!list_empty_careful(list)) {
+                       spin_lock_irqsave(conf->hash_locks + hash, flags);
+                       if (list_empty(conf->inactive_list + hash) &&
+                           !list_empty(list))
+                               atomic_dec(&conf->empty_inactive_list_nr);
+                       list_splice_tail_init(list, conf->inactive_list + hash);
+                       do_wakeup = true;
+                       spin_unlock_irqrestore(conf->hash_locks + hash, flags);
+               }
+               size--;
+               hash--;
+       }
+
+       if (do_wakeup) {
+               wake_up(&conf->wait_for_stripe);
+               if (conf->retry_read_aligned)
+                       md_wakeup_thread(conf->mddev->thread);
+       }
 }
 
 /* should hold conf->device_lock already */
-static int release_stripe_list(struct r5conf *conf)
+static int release_stripe_list(struct r5conf *conf,
+                              struct list_head *temp_inactive_list)
 {
        struct stripe_head *sh;
        int count = 0;
@@ -303,6 +385,8 @@ static int release_stripe_list(struct r5conf *conf)
        head = llist_del_all(&conf->released_stripes);
        head = llist_reverse_order(head);
        while (head) {
+               int hash;
+
                sh = llist_entry(head, struct stripe_head, release_list);
                head = llist_next(head);
                /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
@@ -313,7 +397,8 @@ static int release_stripe_list(struct r5conf *conf)
                 * again, the count is always > 1. This is true for
                 * STRIPE_ON_UNPLUG_LIST bit too.
                 */
-               __release_stripe(conf, sh);
+               hash = sh->hash_lock_index;
+               __release_stripe(conf, sh, &temp_inactive_list[hash]);
                count++;
        }
 
@@ -324,9 +409,12 @@ static void release_stripe(struct stripe_head *sh)
 {
        struct r5conf *conf = sh->raid_conf;
        unsigned long flags;
+       struct list_head list;
+       int hash;
        bool wakeup;
 
-       if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
+       if (unlikely(!conf->mddev->thread) ||
+               test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
                goto slow_path;
        wakeup = llist_add(&sh->release_list, &conf->released_stripes);
        if (wakeup)
@@ -336,8 +424,11 @@ slow_path:
        local_irq_save(flags);
        /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
        if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
-               do_release_stripe(conf, sh);
+               INIT_LIST_HEAD(&list);
+               hash = sh->hash_lock_index;
+               do_release_stripe(conf, sh, &list);
                spin_unlock(&conf->device_lock);
+               release_inactive_stripe_list(conf, &list, hash);
        }
        local_irq_restore(flags);
 }
@@ -362,18 +453,21 @@ static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh)
 
 
 /* find an idle stripe, make sure it is unhashed, and return it. */
-static struct stripe_head *get_free_stripe(struct r5conf *conf)
+static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash)
 {
        struct stripe_head *sh = NULL;
        struct list_head *first;
 
-       if (list_empty(&conf->inactive_list))
+       if (list_empty(conf->inactive_list + hash))
                goto out;
-       first = conf->inactive_list.next;
+       first = (conf->inactive_list + hash)->next;
        sh = list_entry(first, struct stripe_head, lru);
        list_del_init(first);
        remove_hash(sh);
        atomic_inc(&conf->active_stripes);
+       BUG_ON(hash != sh->hash_lock_index);
+       if (list_empty(conf->inactive_list + hash))
+               atomic_inc(&conf->empty_inactive_list_nr);
 out:
        return sh;
 }
@@ -416,7 +510,7 @@ static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
 static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
 {
        struct r5conf *conf = sh->raid_conf;
-       int i;
+       int i, seq;
 
        BUG_ON(atomic_read(&sh->count) != 0);
        BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
@@ -426,7 +520,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
                (unsigned long long)sh->sector);
 
        remove_hash(sh);
-
+retry:
+       seq = read_seqcount_begin(&conf->gen_lock);
        sh->generation = conf->generation - previous;
        sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
        sh->sector = sector;
@@ -448,6 +543,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
                dev->flags = 0;
                raid5_build_block(sh, i, previous);
        }
+       if (read_seqcount_retry(&conf->gen_lock, seq))
+               goto retry;
        insert_hash(conf, sh);
        sh->cpu = smp_processor_id();
 }
@@ -552,29 +649,31 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                  int previous, int noblock, int noquiesce)
 {
        struct stripe_head *sh;
+       int hash = stripe_hash_locks_hash(sector);
 
        pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
 
-       spin_lock_irq(&conf->device_lock);
+       spin_lock_irq(conf->hash_locks + hash);
 
        do {
                wait_event_lock_irq(conf->wait_for_stripe,
                                    conf->quiesce == 0 || noquiesce,
-                                   conf->device_lock);
+                                   *(conf->hash_locks + hash));
                sh = __find_stripe(conf, sector, conf->generation - previous);
                if (!sh) {
                        if (!conf->inactive_blocked)
-                               sh = get_free_stripe(conf);
+                               sh = get_free_stripe(conf, hash);
                        if (noblock && sh == NULL)
                                break;
                        if (!sh) {
                                conf->inactive_blocked = 1;
-                               wait_event_lock_irq(conf->wait_for_stripe,
-                                                   !list_empty(&conf->inactive_list) &&
-                                                   (atomic_read(&conf->active_stripes)
-                                                    < (conf->max_nr_stripes *3/4)
-                                                    || !conf->inactive_blocked),
-                                                   conf->device_lock);
+                               wait_event_lock_irq(
+                                       conf->wait_for_stripe,
+                                       !list_empty(conf->inactive_list + hash) &&
+                                       (atomic_read(&conf->active_stripes)
+                                        < (conf->max_nr_stripes * 3 / 4)
+                                        || !conf->inactive_blocked),
+                                       *(conf->hash_locks + hash));
                                conf->inactive_blocked = 0;
                        } else
                                init_stripe(sh, sector, previous);
@@ -585,9 +684,11 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                    && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
                                    && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
                        } else {
+                               spin_lock(&conf->device_lock);
                                if (!test_bit(STRIPE_HANDLE, &sh->state))
                                        atomic_inc(&conf->active_stripes);
                                if (list_empty(&sh->lru) &&
+                                   !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) &&
                                    !test_bit(STRIPE_EXPANDING, &sh->state))
                                        BUG();
                                list_del_init(&sh->lru);
@@ -595,6 +696,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                        sh->group->stripes_cnt--;
                                        sh->group = NULL;
                                }
+                               spin_unlock(&conf->device_lock);
                        }
                }
        } while (sh == NULL);
@@ -602,7 +704,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
        if (sh)
                atomic_inc(&sh->count);
 
-       spin_unlock_irq(&conf->device_lock);
+       spin_unlock_irq(conf->hash_locks + hash);
        return sh;
 }
 
@@ -758,7 +860,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                bi->bi_sector = (sh->sector
                                                 + rdev->data_offset);
                        if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
-                               bi->bi_rw |= REQ_FLUSH;
+                               bi->bi_rw |= REQ_NOMERGE;
 
                        bi->bi_vcnt = 1;
                        bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
@@ -1582,7 +1684,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
        put_cpu();
 }
 
-static int grow_one_stripe(struct r5conf *conf)
+static int grow_one_stripe(struct r5conf *conf, int hash)
 {
        struct stripe_head *sh;
        sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
@@ -1598,6 +1700,7 @@ static int grow_one_stripe(struct r5conf *conf)
                kmem_cache_free(conf->slab_cache, sh);
                return 0;
        }
+       sh->hash_lock_index = hash;
        /* we just created an active stripe so... */
        atomic_set(&sh->count, 1);
        atomic_inc(&conf->active_stripes);
@@ -1610,6 +1713,7 @@ static int grow_stripes(struct r5conf *conf, int num)
 {
        struct kmem_cache *sc;
        int devs = max(conf->raid_disks, conf->previous_raid_disks);
+       int hash;
 
        if (conf->mddev->gendisk)
                sprintf(conf->cache_name[0],
@@ -1627,9 +1731,13 @@ static int grow_stripes(struct r5conf *conf, int num)
                return 1;
        conf->slab_cache = sc;
        conf->pool_size = devs;
-       while (num--)
-               if (!grow_one_stripe(conf))
+       hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
+       while (num--) {
+               if (!grow_one_stripe(conf, hash))
                        return 1;
+               conf->max_nr_stripes++;
+               hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
+       }
        return 0;
 }
 
@@ -1687,6 +1795,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        int err;
        struct kmem_cache *sc;
        int i;
+       int hash, cnt;
 
        if (newsize <= conf->pool_size)
                return 0; /* never bother to shrink */
@@ -1726,19 +1835,29 @@ static int resize_stripes(struct r5conf *conf, int newsize)
         * OK, we have enough stripes, start collecting inactive
         * stripes and copying them over
         */
+       hash = 0;
+       cnt = 0;
        list_for_each_entry(nsh, &newstripes, lru) {
-               spin_lock_irq(&conf->device_lock);
-               wait_event_lock_irq(conf->wait_for_stripe,
-                                   !list_empty(&conf->inactive_list),
-                                   conf->device_lock);
-               osh = get_free_stripe(conf);
-               spin_unlock_irq(&conf->device_lock);
+               lock_device_hash_lock(conf, hash);
+               wait_event_cmd(conf->wait_for_stripe,
+                                   !list_empty(conf->inactive_list + hash),
+                                   unlock_device_hash_lock(conf, hash),
+                                   lock_device_hash_lock(conf, hash));
+               osh = get_free_stripe(conf, hash);
+               unlock_device_hash_lock(conf, hash);
                atomic_set(&nsh->count, 1);
                for(i=0; i<conf->pool_size; i++)
                        nsh->dev[i].page = osh->dev[i].page;
                for( ; i<newsize; i++)
                        nsh->dev[i].page = NULL;
+               nsh->hash_lock_index = hash;
                kmem_cache_free(conf->slab_cache, osh);
+               cnt++;
+               if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
+                   !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
+                       hash++;
+                       cnt = 0;
+               }
        }
        kmem_cache_destroy(conf->slab_cache);
 
@@ -1797,13 +1916,13 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        return err;
 }
 
-static int drop_one_stripe(struct r5conf *conf)
+static int drop_one_stripe(struct r5conf *conf, int hash)
 {
        struct stripe_head *sh;
 
-       spin_lock_irq(&conf->device_lock);
-       sh = get_free_stripe(conf);
-       spin_unlock_irq(&conf->device_lock);
+       spin_lock_irq(conf->hash_locks + hash);
+       sh = get_free_stripe(conf, hash);
+       spin_unlock_irq(conf->hash_locks + hash);
        if (!sh)
                return 0;
        BUG_ON(atomic_read(&sh->count));
@@ -1815,8 +1934,10 @@ static int drop_one_stripe(struct r5conf *conf)
 
 static void shrink_stripes(struct r5conf *conf)
 {
-       while (drop_one_stripe(conf))
-               ;
+       int hash;
+       for (hash = 0; hash < NR_STRIPE_HASH_LOCKS; hash++)
+               while (drop_one_stripe(conf, hash))
+                       ;
 
        if (conf->slab_cache)
                kmem_cache_destroy(conf->slab_cache);
@@ -1921,6 +2042,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
                               mdname(conf->mddev), bdn);
                else
                        retry = 1;
+               if (set_bad && test_bit(In_sync, &rdev->flags)
+                   && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
+                       retry = 1;
                if (retry)
                        if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
                                set_bit(R5_ReadError, &sh->dev[i].flags);
@@ -3900,7 +4024,8 @@ static void raid5_activate_delayed(struct r5conf *conf)
        }
 }
 
-static void activate_bit_delay(struct r5conf *conf)
+static void activate_bit_delay(struct r5conf *conf,
+       struct list_head *temp_inactive_list)
 {
        /* device_lock is held */
        struct list_head head;
@@ -3908,9 +4033,11 @@ static void activate_bit_delay(struct r5conf *conf)
        list_del_init(&conf->bitmap_list);
        while (!list_empty(&head)) {
                struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
+               int hash;
                list_del_init(&sh->lru);
                atomic_inc(&sh->count);
-               __release_stripe(conf, sh);
+               hash = sh->hash_lock_index;
+               __release_stripe(conf, sh, &temp_inactive_list[hash]);
        }
 }
 
@@ -3926,7 +4053,7 @@ int md_raid5_congested(struct mddev *mddev, int bits)
                return 1;
        if (conf->quiesce)
                return 1;
-       if (list_empty_careful(&conf->inactive_list))
+       if (atomic_read(&conf->empty_inactive_list_nr))
                return 1;
 
        return 0;
@@ -4256,6 +4383,7 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
 struct raid5_plug_cb {
        struct blk_plug_cb      cb;
        struct list_head        list;
+       struct list_head        temp_inactive_list[NR_STRIPE_HASH_LOCKS];
 };
 
 static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
@@ -4266,6 +4394,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
        struct mddev *mddev = cb->cb.data;
        struct r5conf *conf = mddev->private;
        int cnt = 0;
+       int hash;
 
        if (cb->list.next && !list_empty(&cb->list)) {
                spin_lock_irq(&conf->device_lock);
@@ -4283,11 +4412,14 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
                         * STRIPE_ON_RELEASE_LIST could be set here. In that
                         * case, the count is always > 1 here
                         */
-                       __release_stripe(conf, sh);
+                       hash = sh->hash_lock_index;
+                       __release_stripe(conf, sh, &cb->temp_inactive_list[hash]);
                        cnt++;
                }
                spin_unlock_irq(&conf->device_lock);
        }
+       release_inactive_stripe_list(conf, cb->temp_inactive_list,
+                                    NR_STRIPE_HASH_LOCKS);
        if (mddev->queue)
                trace_block_unplug(mddev->queue, cnt, !from_schedule);
        kfree(cb);
@@ -4308,8 +4440,12 @@ static void release_stripe_plug(struct mddev *mddev,
 
        cb = container_of(blk_cb, struct raid5_plug_cb, cb);
 
-       if (cb->list.next == NULL)
+       if (cb->list.next == NULL) {
+               int i;
                INIT_LIST_HEAD(&cb->list);
+               for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+                       INIT_LIST_HEAD(cb->temp_inactive_list + i);
+       }
 
        if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
                list_add_tail(&sh->lru, &cb->list);
@@ -4692,14 +4828,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
            time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
                /* Cannot proceed until we've updated the superblock... */
                wait_event(conf->wait_for_overlap,
-                          atomic_read(&conf->reshape_stripes)==0);
+                          atomic_read(&conf->reshape_stripes)==0
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (atomic_read(&conf->reshape_stripes) != 0)
+                       return 0;
                mddev->reshape_position = conf->reshape_progress;
                mddev->curr_resync_completed = sector_nr;
                conf->reshape_checkpoint = jiffies;
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait, mddev->flags == 0 ||
-                          kthread_should_stop());
+                          test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       return 0;
                spin_lock_irq(&conf->device_lock);
                conf->reshape_safe = mddev->reshape_position;
                spin_unlock_irq(&conf->device_lock);
@@ -4782,7 +4923,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
            >= mddev->resync_max - mddev->curr_resync_completed) {
                /* Cannot proceed until we've updated the superblock... */
                wait_event(conf->wait_for_overlap,
-                          atomic_read(&conf->reshape_stripes) == 0);
+                          atomic_read(&conf->reshape_stripes) == 0
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (atomic_read(&conf->reshape_stripes) != 0)
+                       goto ret;
                mddev->reshape_position = conf->reshape_progress;
                mddev->curr_resync_completed = sector_nr;
                conf->reshape_checkpoint = jiffies;
@@ -4790,13 +4934,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
                md_wakeup_thread(mddev->thread);
                wait_event(mddev->sb_wait,
                           !test_bit(MD_CHANGE_DEVS, &mddev->flags)
-                          || kthread_should_stop());
+                          || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       goto ret;
                spin_lock_irq(&conf->device_lock);
                conf->reshape_safe = mddev->reshape_position;
                spin_unlock_irq(&conf->device_lock);
                wake_up(&conf->wait_for_overlap);
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
+ret:
        return reshape_sectors;
 }
 
@@ -4954,27 +5101,45 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
 }
 
 static int handle_active_stripes(struct r5conf *conf, int group,
-                                struct r5worker *worker)
+                                struct r5worker *worker,
+                                struct list_head *temp_inactive_list)
 {
        struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
-       int i, batch_size = 0;
+       int i, batch_size = 0, hash;
+       bool release_inactive = false;
 
        while (batch_size < MAX_STRIPE_BATCH &&
                        (sh = __get_priority_stripe(conf, group)) != NULL)
                batch[batch_size++] = sh;
 
-       if (batch_size == 0)
-               return batch_size;
+       if (batch_size == 0) {
+               for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+                       if (!list_empty(temp_inactive_list + i))
+                               break;
+               if (i == NR_STRIPE_HASH_LOCKS)
+                       return batch_size;
+               release_inactive = true;
+       }
        spin_unlock_irq(&conf->device_lock);
 
+       release_inactive_stripe_list(conf, temp_inactive_list,
+                                    NR_STRIPE_HASH_LOCKS);
+
+       if (release_inactive) {
+               spin_lock_irq(&conf->device_lock);
+               return 0;
+       }
+
        for (i = 0; i < batch_size; i++)
                handle_stripe(batch[i]);
 
        cond_resched();
 
        spin_lock_irq(&conf->device_lock);
-       for (i = 0; i < batch_size; i++)
-               __release_stripe(conf, batch[i]);
+       for (i = 0; i < batch_size; i++) {
+               hash = batch[i]->hash_lock_index;
+               __release_stripe(conf, batch[i], &temp_inactive_list[hash]);
+       }
        return batch_size;
 }
 
@@ -4995,9 +5160,10 @@ static void raid5_do_work(struct work_struct *work)
        while (1) {
                int batch_size, released;
 
-               released = release_stripe_list(conf);
+               released = release_stripe_list(conf, worker->temp_inactive_list);
 
-               batch_size = handle_active_stripes(conf, group_id, worker);
+               batch_size = handle_active_stripes(conf, group_id, worker,
+                                                  worker->temp_inactive_list);
                worker->working = false;
                if (!batch_size && !released)
                        break;
@@ -5036,7 +5202,7 @@ static void raid5d(struct md_thread *thread)
                struct bio *bio;
                int batch_size, released;
 
-               released = release_stripe_list(conf);
+               released = release_stripe_list(conf, conf->temp_inactive_list);
 
                if (
                    !list_empty(&conf->bitmap_list)) {
@@ -5046,7 +5212,7 @@ static void raid5d(struct md_thread *thread)
                        bitmap_unplug(mddev->bitmap);
                        spin_lock_irq(&conf->device_lock);
                        conf->seq_write = conf->seq_flush;
-                       activate_bit_delay(conf);
+                       activate_bit_delay(conf, conf->temp_inactive_list);
                }
                raid5_activate_delayed(conf);
 
@@ -5060,7 +5226,8 @@ static void raid5d(struct md_thread *thread)
                        handled++;
                }
 
-               batch_size = handle_active_stripes(conf, ANY_GROUP, NULL);
+               batch_size = handle_active_stripes(conf, ANY_GROUP, NULL,
+                                                  conf->temp_inactive_list);
                if (!batch_size && !released)
                        break;
                handled += batch_size;
@@ -5096,22 +5263,29 @@ raid5_set_cache_size(struct mddev *mddev, int size)
 {
        struct r5conf *conf = mddev->private;
        int err;
+       int hash;
 
        if (size <= 16 || size > 32768)
                return -EINVAL;
+       hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
        while (size < conf->max_nr_stripes) {
-               if (drop_one_stripe(conf))
+               if (drop_one_stripe(conf, hash))
                        conf->max_nr_stripes--;
                else
                        break;
+               hash--;
+               if (hash < 0)
+                       hash = NR_STRIPE_HASH_LOCKS - 1;
        }
        err = md_allow_write(mddev);
        if (err)
                return err;
+       hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
        while (size > conf->max_nr_stripes) {
-               if (grow_one_stripe(conf))
+               if (grow_one_stripe(conf, hash))
                        conf->max_nr_stripes++;
                else break;
+               hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
        }
        return 0;
 }
@@ -5199,15 +5373,18 @@ raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
                return 0;
 }
 
-static int alloc_thread_groups(struct r5conf *conf, int cnt);
+static int alloc_thread_groups(struct r5conf *conf, int cnt,
+                              int *group_cnt,
+                              int *worker_cnt_per_group,
+                              struct r5worker_group **worker_groups);
 static ssize_t
 raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
 {
        struct r5conf *conf = mddev->private;
        unsigned long new;
        int err;
-       struct r5worker_group *old_groups;
-       int old_group_cnt;
+       struct r5worker_group *new_groups, *old_groups;
+       int group_cnt, worker_cnt_per_group;
 
        if (len >= PAGE_SIZE)
                return -EINVAL;
@@ -5223,14 +5400,19 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
        mddev_suspend(mddev);
 
        old_groups = conf->worker_groups;
-       old_group_cnt = conf->worker_cnt_per_group;
+       if (old_groups)
+               flush_workqueue(raid5_wq);
+
+       err = alloc_thread_groups(conf, new,
+                                 &group_cnt, &worker_cnt_per_group,
+                                 &new_groups);
+       if (!err) {
+               spin_lock_irq(&conf->device_lock);
+               conf->group_cnt = group_cnt;
+               conf->worker_cnt_per_group = worker_cnt_per_group;
+               conf->worker_groups = new_groups;
+               spin_unlock_irq(&conf->device_lock);
 
-       conf->worker_groups = NULL;
-       err = alloc_thread_groups(conf, new);
-       if (err) {
-               conf->worker_groups = old_groups;
-               conf->worker_cnt_per_group = old_group_cnt;
-       } else {
                if (old_groups)
                        kfree(old_groups[0].workers);
                kfree(old_groups);
@@ -5260,40 +5442,47 @@ static struct attribute_group raid5_attrs_group = {
        .attrs = raid5_attrs,
 };
 
-static int alloc_thread_groups(struct r5conf *conf, int cnt)
+static int alloc_thread_groups(struct r5conf *conf, int cnt,
+                              int *group_cnt,
+                              int *worker_cnt_per_group,
+                              struct r5worker_group **worker_groups)
 {
-       int i, j;
+       int i, j, k;
        ssize_t size;
        struct r5worker *workers;
 
-       conf->worker_cnt_per_group = cnt;
+       *worker_cnt_per_group = cnt;
        if (cnt == 0) {
-               conf->worker_groups = NULL;
+               *group_cnt = 0;
+               *worker_groups = NULL;
                return 0;
        }
-       conf->group_cnt = num_possible_nodes();
+       *group_cnt = num_possible_nodes();
        size = sizeof(struct r5worker) * cnt;
-       workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
-       conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
-                               conf->group_cnt, GFP_NOIO);
-       if (!conf->worker_groups || !workers) {
+       workers = kzalloc(size * *group_cnt, GFP_NOIO);
+       *worker_groups = kzalloc(sizeof(struct r5worker_group) *
+                               *group_cnt, GFP_NOIO);
+       if (!*worker_groups || !workers) {
                kfree(workers);
-               kfree(conf->worker_groups);
-               conf->worker_groups = NULL;
+               kfree(*worker_groups);
                return -ENOMEM;
        }
 
-       for (i = 0; i < conf->group_cnt; i++) {
+       for (i = 0; i < *group_cnt; i++) {
                struct r5worker_group *group;
 
-               group = &conf->worker_groups[i];
+               group = worker_groups[i];
                INIT_LIST_HEAD(&group->handle_list);
                group->conf = conf;
                group->workers = workers + i * cnt;
 
                for (j = 0; j < cnt; j++) {
-                       group->workers[j].group = group;
-                       INIT_WORK(&group->workers[j].work, raid5_do_work);
+                       struct r5worker *worker = group->workers + j;
+                       worker->group = group;
+                       INIT_WORK(&worker->work, raid5_do_work);
+
+                       for (k = 0; k < NR_STRIPE_HASH_LOCKS; k++)
+                               INIT_LIST_HEAD(worker->temp_inactive_list + k);
                }
        }
 
@@ -5444,6 +5633,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        struct md_rdev *rdev;
        struct disk_info *disk;
        char pers_name[6];
+       int i;
+       int group_cnt, worker_cnt_per_group;
+       struct r5worker_group *new_group;
 
        if (mddev->new_level != 5
            && mddev->new_level != 4
@@ -5478,7 +5670,12 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        if (conf == NULL)
                goto abort;
        /* Don't enable multi-threading by default*/
-       if (alloc_thread_groups(conf, 0))
+       if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
+                                &new_group)) {
+               conf->group_cnt = group_cnt;
+               conf->worker_cnt_per_group = worker_cnt_per_group;
+               conf->worker_groups = new_group;
+       } else
                goto abort;
        spin_lock_init(&conf->device_lock);
        seqcount_init(&conf->gen_lock);
@@ -5488,7 +5685,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        INIT_LIST_HEAD(&conf->hold_list);
        INIT_LIST_HEAD(&conf->delayed_list);
        INIT_LIST_HEAD(&conf->bitmap_list);
-       INIT_LIST_HEAD(&conf->inactive_list);
        init_llist_head(&conf->released_stripes);
        atomic_set(&conf->active_stripes, 0);
        atomic_set(&conf->preread_active_stripes, 0);
@@ -5514,6 +5710,21 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
                goto abort;
 
+       /* We init hash_locks[0] separately to that it can be used
+        * as the reference lock in the spin_lock_nest_lock() call
+        * in lock_all_device_hash_locks_irq in order to convince
+        * lockdep that we know what we are doing.
+        */
+       spin_lock_init(conf->hash_locks);
+       for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
+               spin_lock_init(conf->hash_locks + i);
+
+       for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+               INIT_LIST_HEAD(conf->inactive_list + i);
+
+       for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+               INIT_LIST_HEAD(conf->temp_inactive_list + i);
+
        conf->level = mddev->new_level;
        if (raid5_alloc_percpu(conf) != 0)
                goto abort;
@@ -5554,7 +5765,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        else
                conf->max_degraded = 1;
        conf->algorithm = mddev->new_layout;
-       conf->max_nr_stripes = NR_STRIPES;
        conf->reshape_progress = mddev->reshape_position;
        if (conf->reshape_progress != MaxSector) {
                conf->prev_chunk_sectors = mddev->chunk_sectors;
@@ -5563,7 +5773,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 
        memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
                 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
-       if (grow_stripes(conf, conf->max_nr_stripes)) {
+       atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
+       if (grow_stripes(conf, NR_STRIPES)) {
                printk(KERN_ERR
                       "md/raid:%s: couldn't allocate %dkB for buffers\n",
                       mdname(mddev), memory);
@@ -6369,12 +6580,18 @@ static int raid5_start_reshape(struct mddev *mddev)
        if (!mddev->sync_thread) {
                mddev->recovery = 0;
                spin_lock_irq(&conf->device_lock);
+               write_seqcount_begin(&conf->gen_lock);
                mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
+               mddev->new_chunk_sectors =
+                       conf->chunk_sectors = conf->prev_chunk_sectors;
+               mddev->new_layout = conf->algorithm = conf->prev_algo;
                rdev_for_each(rdev, mddev)
                        rdev->new_data_offset = rdev->data_offset;
                smp_wmb();
+               conf->generation --;
                conf->reshape_progress = MaxSector;
                mddev->reshape_position = MaxSector;
+               write_seqcount_end(&conf->gen_lock);
                spin_unlock_irq(&conf->device_lock);
                return -EAGAIN;
        }
@@ -6462,27 +6679,28 @@ static void raid5_quiesce(struct mddev *mddev, int state)
                break;
 
        case 1: /* stop all writes */
-               spin_lock_irq(&conf->device_lock);
+               lock_all_device_hash_locks_irq(conf);
                /* '2' tells resync/reshape to pause so that all
                 * active stripes can drain
                 */
                conf->quiesce = 2;
-               wait_event_lock_irq(conf->wait_for_stripe,
+               wait_event_cmd(conf->wait_for_stripe,
                                    atomic_read(&conf->active_stripes) == 0 &&
                                    atomic_read(&conf->active_aligned_reads) == 0,
-                                   conf->device_lock);
+                                   unlock_all_device_hash_locks_irq(conf),
+                                   lock_all_device_hash_locks_irq(conf));
                conf->quiesce = 1;
-               spin_unlock_irq(&conf->device_lock);
+               unlock_all_device_hash_locks_irq(conf);
                /* allow reshape to continue */
                wake_up(&conf->wait_for_overlap);
                break;
 
        case 0: /* re-enable writes */
-               spin_lock_irq(&conf->device_lock);
+               lock_all_device_hash_locks_irq(conf);
                conf->quiesce = 0;
                wake_up(&conf->wait_for_stripe);
                wake_up(&conf->wait_for_overlap);
-               spin_unlock_irq(&conf->device_lock);
+               unlock_all_device_hash_locks_irq(conf);
                break;
        }
 }
index b42e6b462edad8967e68bfa263c43edd1787953d..01ad8ae8f57830a04de4e1e30b731f74660bda62 100644 (file)
@@ -205,6 +205,7 @@ struct stripe_head {
        short                   pd_idx;         /* parity disk index */
        short                   qd_idx;         /* 'Q' disk index for raid6 */
        short                   ddf_layout;/* use DDF ordering to calculate Q */
+       short                   hash_lock_index;
        unsigned long           state;          /* state flags */
        atomic_t                count;        /* nr of active thread/requests */
        int                     bm_seq; /* sequence number for bitmap flushes */
@@ -367,9 +368,18 @@ struct disk_info {
        struct md_rdev  *rdev, *replacement;
 };
 
+/* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
+ * This is because we sometimes take all the spinlocks
+ * and creating that much locking depth can cause
+ * problems.
+ */
+#define NR_STRIPE_HASH_LOCKS 8
+#define STRIPE_HASH_LOCKS_MASK (NR_STRIPE_HASH_LOCKS - 1)
+
 struct r5worker {
        struct work_struct work;
        struct r5worker_group *group;
+       struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS];
        bool working;
 };
 
@@ -382,6 +392,8 @@ struct r5worker_group {
 
 struct r5conf {
        struct hlist_head       *stripe_hashtbl;
+       /* only protect corresponding hash list and inactive_list */
+       spinlock_t              hash_locks[NR_STRIPE_HASH_LOCKS];
        struct mddev            *mddev;
        int                     chunk_sectors;
        int                     level, algorithm;
@@ -462,7 +474,8 @@ struct r5conf {
         * Free stripes pool
         */
        atomic_t                active_stripes;
-       struct list_head        inactive_list;
+       struct list_head        inactive_list[NR_STRIPE_HASH_LOCKS];
+       atomic_t                empty_inactive_list_nr;
        struct llist_head       released_stripes;
        wait_queue_head_t       wait_for_stripe;
        wait_queue_head_t       wait_for_overlap;
@@ -477,6 +490,7 @@ struct r5conf {
         * the new thread here until we fully activate the array.
         */
        struct md_thread        *thread;
+       struct list_head        temp_inactive_list[NR_STRIPE_HASH_LOCKS];
        struct r5worker_group   *worker_groups;
        int                     group_cnt;
        int                     worker_cnt_per_group;
index 36513e896413c01a3b209e153e75c680aac36bc6..65cab70fefcb067e3de0a404d47bd41feaafd38b 100644 (file)
@@ -341,8 +341,7 @@ static void deinterlace_issue_dma(struct deinterlace_ctx *ctx, int op,
        ctx->xt->dir = DMA_MEM_TO_MEM;
        ctx->xt->src_sgl = false;
        ctx->xt->dst_sgl = true;
-       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT |
-               DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SKIP_SRC_UNMAP;
+       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
        tx = dmadev->device_prep_interleaved_dma(chan, ctx->xt, flags);
        if (tx == NULL) {
index 6a74ce040d288eb06cbd6684727358277f89481b..ccdadd623a3aae3ffabd49be553709d5a9511ec6 100644 (file)
@@ -565,7 +565,7 @@ static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
 
        desc = dmaengine_prep_slave_sg(fh->chan,
                buf->sg, sg_elems, DMA_DEV_TO_MEM,
-               DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+               DMA_PREP_INTERRUPT);
        if (!desc) {
                spin_lock_irq(&fh->queue_lock);
                list_del_init(&vb->queue);
index 08b18f3f5264aa8052cea030143d5bc5c4b76cd0..9e2b985293fc08cf30bef9a9455fcade1eb095cb 100644 (file)
@@ -633,8 +633,7 @@ static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf)
        struct dma_async_tx_descriptor *tx;
        dma_cookie_t cookie;
        dma_addr_t dst, src;
-       unsigned long dma_flags = DMA_COMPL_SKIP_DEST_UNMAP |
-                                 DMA_COMPL_SKIP_SRC_UNMAP;
+       unsigned long dma_flags = 0;
 
        dst_sg = buf->vb.sglist;
        dst_nents = buf->vb.sglen;
index ef8956568c3a2978b90cab02a9dc0ed56c76254f..157b570ba343e4648b796e7330e7b75bc052d00a 100644 (file)
@@ -308,8 +308,7 @@ static void sdio_acpi_set_handle(struct sdio_func *func)
        struct mmc_host *host = func->card->host;
        u64 addr = (host->slotno << 16) | func->num;
 
-       ACPI_HANDLE_SET(&func->dev,
-                       acpi_get_child(ACPI_HANDLE(host->parent), addr));
+       acpi_preset_companion(&func->dev, ACPI_HANDLE(host->parent), addr);
 }
 #else
 static inline void sdio_acpi_set_handle(struct sdio_func *func) {}
index d78a97d4153a98234998080b40c4cb94f50a0876..59f08c44abdbc9be920ea62974d19bcdc7884889 100644 (file)
@@ -375,8 +375,7 @@ static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
 
        dma_dev = host->dma_chan->device;
 
-       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-               DMA_COMPL_SKIP_DEST_UNMAP;
+       flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
        phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
        if (dma_mapping_error(dma_dev->dev, phys_addr)) {
index 3dc1a7564d8725d62085b16cb7c0544e138858b2..8b2752263db9a5549742bb36c3dcee48999b8b62 100644 (file)
@@ -573,8 +573,6 @@ static int dma_xfer(struct fsmc_nand_data *host, void *buffer, int len,
        dma_dev = chan->device;
        dma_addr = dma_map_single(dma_dev->dev, buffer, len, direction);
 
-       flags |= DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
-
        if (direction == DMA_TO_DEVICE) {
                dma_src = dma_addr;
                dma_dst = host->data_pa;
index 0951f7aca1eff6671f187d03b203952926a1ecea..822616e3c3754118ab2e09eada44d59a24c3954c 100644 (file)
@@ -459,8 +459,7 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
                sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
 
        ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-               &ctl->sg, 1, DMA_MEM_TO_DEV,
-               DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+               &ctl->sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
        if (!ctl->adesc)
                return NETDEV_TX_BUSY;
 
@@ -571,8 +570,7 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev)
                sg_dma_len(sg) = DMA_BUFFER_SIZE;
 
                ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-                       sg, 1, DMA_DEV_TO_MEM,
-                       DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+                       sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 
                if (!ctl->adesc)
                        goto out;
index 12a9e83c008b402f0f7acde4e80c742c7795348e..d0222f13d154808f3cfa4ed3cab26cd1a7b899ee 100644 (file)
@@ -1034,10 +1034,9 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
        struct dma_chan *chan = qp->dma_chan;
        struct dma_device *device;
        size_t pay_off, buff_off;
-       dma_addr_t src, dest;
+       struct dmaengine_unmap_data *unmap;
        dma_cookie_t cookie;
        void *buf = entry->buf;
-       unsigned long flags;
 
        entry->len = len;
 
@@ -1045,35 +1044,49 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
                goto err;
 
        if (len < copy_bytes) 
-               goto err1;
+               goto err_wait;
 
        device = chan->device;
        pay_off = (size_t) offset & ~PAGE_MASK;
        buff_off = (size_t) buf & ~PAGE_MASK;
 
        if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
-               goto err1;
+               goto err_wait;
 
-       dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE);
-       if (dma_mapping_error(device->dev, dest))
-               goto err1;
+       unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
+       if (!unmap)
+               goto err_wait;
 
-       src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE);
-       if (dma_mapping_error(device->dev, src))
-               goto err2;
+       unmap->len = len;
+       unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
+                                     pay_off, len, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[0]))
+               goto err_get_unmap;
+
+       unmap->to_cnt = 1;
 
-       flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE |
-               DMA_PREP_INTERRUPT;
-       txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+       unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf),
+                                     buff_off, len, DMA_FROM_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[1]))
+               goto err_get_unmap;
+
+       unmap->from_cnt = 1;
+
+       txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                            unmap->addr[0], len,
+                                            DMA_PREP_INTERRUPT);
        if (!txd)
-               goto err3;
+               goto err_get_unmap;
 
        txd->callback = ntb_rx_copy_callback;
        txd->callback_param = entry;
+       dma_set_unmap(txd, unmap);
 
        cookie = dmaengine_submit(txd);
        if (dma_submit_error(cookie))
-               goto err3;
+               goto err_set_unmap;
+
+       dmaengine_unmap_put(unmap);
 
        qp->last_cookie = cookie;
 
@@ -1081,11 +1094,11 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset,
 
        return;
 
-err3:
-       dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
-err2:
-       dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE);
-err1:
+err_set_unmap:
+       dmaengine_unmap_put(unmap);
+err_get_unmap:
+       dmaengine_unmap_put(unmap);
+err_wait:
        /* If the callbacks come out of order, the writing of the index to the
         * last completed will be out of order.  This may result in the
         * receive stalling forever.
@@ -1245,12 +1258,12 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
        struct dma_chan *chan = qp->dma_chan;
        struct dma_device *device;
        size_t dest_off, buff_off;
-       dma_addr_t src, dest;
+       struct dmaengine_unmap_data *unmap;
+       dma_addr_t dest;
        dma_cookie_t cookie;
        void __iomem *offset;
        size_t len = entry->len;
        void *buf = entry->buf;
-       unsigned long flags;
 
        offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
        hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1273,28 +1286,41 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
        if (!is_dma_copy_aligned(device, buff_off, dest_off, len))
                goto err;
 
-       src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE);
-       if (dma_mapping_error(device->dev, src))
+       unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+       if (!unmap)
                goto err;
 
-       flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT;
-       txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+       unmap->len = len;
+       unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf),
+                                     buff_off, len, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[0]))
+               goto err_get_unmap;
+
+       unmap->to_cnt = 1;
+
+       txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+                                            DMA_PREP_INTERRUPT);
        if (!txd)
-               goto err1;
+               goto err_get_unmap;
 
        txd->callback = ntb_tx_copy_callback;
        txd->callback_param = entry;
+       dma_set_unmap(txd, unmap);
 
        cookie = dmaengine_submit(txd);
        if (dma_submit_error(cookie))
-               goto err1;
+               goto err_set_unmap;
+
+       dmaengine_unmap_put(unmap);
 
        dma_async_issue_pending(chan);
        qp->tx_async++;
 
        return;
-err1:
-       dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
+err_set_unmap:
+       dmaengine_unmap_put(unmap);
+err_get_unmap:
+       dmaengine_unmap_put(unmap);
 err:
        ntb_memcpy_tx(entry, offset);
        qp->tx_memcpy++;
index 1ce8ee054f1aa89ba83fb1001761e9200262a666..a94d850ae228c377387d036fd0f3af7e656130c6 100644 (file)
@@ -367,7 +367,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags)
                string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
        }
 
-       handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       handle = ACPI_HANDLE(&pdev->dev);
        if (!handle) {
                /*
                 * This hotplug controller was not listed in the ACPI name
index 26100f510b1087f45bbe39b79dcf9649acf38e9d..1592dbe4f90461dbfa82be205f334eb0e8ecc6f6 100644 (file)
@@ -176,7 +176,6 @@ u8 acpiphp_get_latch_status(struct acpiphp_slot *slot);
 u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot);
 
 /* variables */
-extern bool acpiphp_debug;
 extern bool acpiphp_disabled;
 
 #endif /* _ACPIPHP_H */
index ead7c534095e885572c85c6963d57742dcab7bec..cff7cadfc2e4b27344d79ed479f419a7a012f5e0 100644 (file)
@@ -54,7 +54,7 @@ int pciehp_acpi_slot_detection_check(struct pci_dev *dev)
 {
        if (slot_detection_mode != PCIEHP_DETECT_ACPI)
                return 0;
-       if (acpi_pci_detect_ejectable(DEVICE_ACPI_HANDLE(&dev->dev)))
+       if (acpi_pci_detect_ejectable(ACPI_HANDLE(&dev->dev)))
                return 0;
        return -ENODEV;
 }
@@ -96,7 +96,7 @@ static int __init dummy_probe(struct pcie_device *dev)
                        dup_slot_id++;
        }
        list_add_tail(&slot->list, &dummy_slots);
-       handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+       handle = ACPI_HANDLE(&pdev->dev);
        if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
                acpi_slot_detected = 1;
        return -ENODEV;         /* dummy driver always returns error */
index b2781dfe60e9e3cbc79095de46b96f8421958275..5b05a68cca6c73aaf50c7f66ccec5923e4f3d431 100644 (file)
@@ -9,6 +9,7 @@
  * Work to add BIOS PROM support was completed by Mike Habeck.
  */
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -29,7 +30,6 @@
 #include <asm/sn/sn_feature_sets.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/types.h>
-#include <linux/acpi.h>
 #include <asm/sn/acpi.h>
 
 #include "../pci.h"
@@ -414,7 +414,7 @@ static int enable_slot(struct hotplug_slot *bss_hotplug_slot)
                acpi_handle rethandle;
                acpi_status ret;
 
-               phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+               phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
 
                if (acpi_bus_get_device(phandle, &pdevice)) {
                        dev_dbg(&slot->pci_bus->self->dev,
@@ -495,7 +495,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
 
        /* free the ACPI resources for the slot */
        if (SN_ACPI_BASE_SUPPORT() &&
-            PCI_CONTROLLER(slot->pci_bus)->acpi_handle) {
+            PCI_CONTROLLER(slot->pci_bus)->companion) {
                unsigned long long adr;
                struct acpi_device *device;
                acpi_handle phandle;
@@ -504,7 +504,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
                acpi_status ret;
 
                /* Get the rootbus node pointer */
-               phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+               phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
 
                acpi_scan_lock_acquire();
                /*
index 1b90579b233ae8c2d7db5efe00cc022c42fbb73a..50ce6809829836c38d4aedfa58aeb642b126d9df 100644 (file)
@@ -37,7 +37,7 @@ static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
        char *type;
        struct resource *res;
 
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
        if (!handle)
                return -EINVAL;
 
index dfd1f59de729c6293416d789fb6f665dc09bf701..f166126e28d17ad4b43846beb361d0a5c10fa78e 100644 (file)
@@ -173,14 +173,14 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev)
 
 static bool acpi_pci_power_manageable(struct pci_dev *dev)
 {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       acpi_handle handle = ACPI_HANDLE(&dev->dev);
 
        return handle ? acpi_bus_power_manageable(handle) : false;
 }
 
 static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       acpi_handle handle = ACPI_HANDLE(&dev->dev);
        static const u8 state_conv[] = {
                [PCI_D0] = ACPI_STATE_D0,
                [PCI_D1] = ACPI_STATE_D1,
@@ -217,7 +217,7 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 
 static bool acpi_pci_can_wakeup(struct pci_dev *dev)
 {
-       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       acpi_handle handle = ACPI_HANDLE(&dev->dev);
 
        return handle ? acpi_bus_can_wakeup(handle) : false;
 }
index edaed6f4da6cebfbe1c0e2b1b40ac94a039af9dc..d51f45aa669e5ff9184daab1df72776e8165aa84 100644 (file)
@@ -263,7 +263,7 @@ device_has_dsm(struct device *dev)
        acpi_handle handle;
        struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
 
-       handle = DEVICE_ACPI_HANDLE(dev);
+       handle = ACPI_HANDLE(dev);
 
        if (!handle)
                return FALSE;
@@ -295,7 +295,7 @@ acpilabel_show(struct device *dev, struct device_attribute *attr, char *buf)
        acpi_handle handle;
        int length;
 
-       handle = DEVICE_ACPI_HANDLE(dev);
+       handle = ACPI_HANDLE(dev);
 
        if (!handle)
                return -1;
@@ -316,7 +316,7 @@ acpiindex_show(struct device *dev, struct device_attribute *attr, char *buf)
        acpi_handle handle;
        int length;
 
-       handle = DEVICE_ACPI_HANDLE(dev);
+       handle = ACPI_HANDLE(dev);
 
        if (!handle)
                return -1;
index 605a9be5512907238e520d080c55e4637afcbea0..b9429fbf1cd82a403c65bddd09acc6e11078b66a 100644 (file)
@@ -519,7 +519,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 
        gmux_data->power_state = VGA_SWITCHEROO_ON;
 
-       gmux_data->dhandle = DEVICE_ACPI_HANDLE(&pnp->dev);
+       gmux_data->dhandle = ACPI_HANDLE(&pnp->dev);
        if (!gmux_data->dhandle) {
                pr_err("Cannot find acpi handle for pnp device %s\n",
                       dev_name(&pnp->dev));
index 747826d99059955f8941d592ef2f7734cd38fd08..14655a0f0431b35bd9a0988b669c510a162155e3 100644 (file)
@@ -89,7 +89,7 @@ static int pnpacpi_set_resources(struct pnp_dev *dev)
 
        pnp_dbg(&dev->dev, "set resources\n");
 
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
        if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
                dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
                return -ENODEV;
@@ -122,7 +122,7 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev)
 
        dev_dbg(&dev->dev, "disable resources\n");
 
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
        if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
                dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
                return 0;
@@ -144,7 +144,7 @@ static bool pnpacpi_can_wakeup(struct pnp_dev *dev)
        struct acpi_device *acpi_dev;
        acpi_handle handle;
 
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
        if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
                dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
                return false;
@@ -159,7 +159,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
        acpi_handle handle;
        int error = 0;
 
-       handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       handle = ACPI_HANDLE(&dev->dev);
        if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
                dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
                return 0;
@@ -194,7 +194,7 @@ static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
 static int pnpacpi_resume(struct pnp_dev *dev)
 {
        struct acpi_device *acpi_dev;
-       acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+       acpi_handle handle = ACPI_HANDLE(&dev->dev);
        int error = 0;
 
        if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
index 15f166a470a7f3fe5eb0e45dd2be80c2a384a76d..0077302221164e62df209626e6595a5a9505e4ee 100644 (file)
@@ -626,7 +626,7 @@ comment "Platform RTC drivers"
 
 config RTC_DRV_CMOS
        tristate "PC-style 'CMOS'"
-       depends on X86 || ALPHA || ARM || M32R || ATARI || PPC || MIPS || SPARC64
+       depends on X86 || ARM || M32R || ATARI || PPC || MIPS || SPARC64
        default y if X86
        help
          Say "yes" here to get direct support for the real time clock
@@ -643,6 +643,14 @@ config RTC_DRV_CMOS
          This driver can also be built as a module. If so, the module
          will be called rtc-cmos.
 
+config RTC_DRV_ALPHA
+       bool "Alpha PC-style CMOS"
+       depends on ALPHA
+       default y
+       help
+         Direct support for the real-time clock found on every Alpha
+         system, specifically MC146818 compatibles.  If in doubt, say Y.
+
 config RTC_DRV_VRTC
        tristate "Virtual RTC for Intel MID platforms"
        depends on X86_INTEL_MID
index 8b2cd8a5a2ffe3d5928d2bc7f026f3fcceaa1dca..c0da95e95702123d403bf58ed4b894a104eeb2ec 100644 (file)
@@ -428,6 +428,14 @@ static int __exit at91_rtc_remove(struct platform_device *pdev)
        return 0;
 }
 
+static void at91_rtc_shutdown(struct platform_device *pdev)
+{
+       /* Disable all interrupts */
+       at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM |
+                                       AT91_RTC_SECEV | AT91_RTC_TIMEV |
+                                       AT91_RTC_CALEV);
+}
+
 #ifdef CONFIG_PM_SLEEP
 
 /* AT91RM9200 RTC Power management control */
@@ -466,6 +474,7 @@ static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume);
 
 static struct platform_driver at91_rtc_driver = {
        .remove         = __exit_p(at91_rtc_remove),
+       .shutdown       = at91_rtc_shutdown,
        .driver         = {
                .name   = "at91_rtc",
                .owner  = THIS_MODULE,
index b9f0192758d6d929aab86d087c443adc46154e66..6d207afec8cbdb578c9e5428d6018dff1d93690b 100644 (file)
@@ -150,7 +150,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
                                &dws->tx_sgl,
                                1,
                                DMA_MEM_TO_DEV,
-                               DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+                               DMA_PREP_INTERRUPT);
        txdesc->callback = dw_spi_dma_done;
        txdesc->callback_param = dws;
 
@@ -173,7 +173,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
                                &dws->rx_sgl,
                                1,
                                DMA_DEV_TO_MEM,
-                               DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+                               DMA_PREP_INTERRUPT);
        rxdesc->callback = dw_spi_dma_done;
        rxdesc->callback_param = dws;
 
index 8d85ddc4601173c14fd24ae6a5f7defe60b94102..18cc625d887f796aed4b082eb2366262aec8aaa8 100644 (file)
@@ -357,6 +357,19 @@ struct spi_device *spi_alloc_device(struct spi_master *master)
 }
 EXPORT_SYMBOL_GPL(spi_alloc_device);
 
+static void spi_dev_set_name(struct spi_device *spi)
+{
+       struct acpi_device *adev = ACPI_COMPANION(&spi->dev);
+
+       if (adev) {
+               dev_set_name(&spi->dev, "spi-%s", acpi_dev_name(adev));
+               return;
+       }
+
+       dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
+                    spi->chip_select);
+}
+
 /**
  * spi_add_device - Add spi_device allocated with spi_alloc_device
  * @spi: spi_device to register
@@ -383,9 +396,7 @@ int spi_add_device(struct spi_device *spi)
        }
 
        /* Set the bus ID string */
-       dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
-                       spi->chip_select);
-
+       spi_dev_set_name(spi);
 
        /* We need to make sure there's no other device with this
         * chipselect **BEFORE** we call setup(), else we'll trash
@@ -1144,7 +1155,7 @@ static acpi_status acpi_spi_add_device(acpi_handle handle, u32 level,
                return AE_NO_MEMORY;
        }
 
-       ACPI_HANDLE_SET(&spi->dev, handle);
+       ACPI_COMPANION_SET(&spi->dev, adev);
        spi->irq = -1;
 
        INIT_LIST_HEAD(&resource_list);
index 537750261aaa2fe5a5e2de8fdb3e6ee0adf8fe4d..7d8103cd3e2ec56eacbb5a5d3f3f332597e190ff 100644 (file)
@@ -1433,7 +1433,7 @@ static void work_fn_rx(struct work_struct *work)
        desc = s->desc_rx[new];
 
        if (dma_async_is_tx_complete(s->chan_rx, s->active_rx, NULL, NULL) !=
-           DMA_SUCCESS) {
+           DMA_COMPLETE) {
                /* Handle incomplete DMA receive */
                struct dma_chan *chan = s->chan_rx;
                struct shdma_desc *sh_desc = container_of(desc,
index 06cec635e703adc3f9859ae8ca53dbb124820cd4..a7c04e24ca484deb233db5dcfd995b73427c4cc0 100644 (file)
@@ -5501,6 +5501,6 @@ acpi_handle usb_get_hub_port_acpi_handle(struct usb_device *hdev,
        if (!hub)
                return NULL;
 
-       return DEVICE_ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
+       return ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
 }
 #endif
index 255c14464bf2ea7a30dffab6fa1770c7b030c0aa..4e243c37f17f50ab197582a3ee5d4662e933d368 100644 (file)
@@ -173,7 +173,7 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
                }
 
                /* root hub's parent is the usb hcd. */
-               parent_handle = DEVICE_ACPI_HANDLE(dev->parent);
+               parent_handle = ACPI_HANDLE(dev->parent);
                *handle = acpi_get_child(parent_handle, udev->portnum);
                if (!*handle)
                        return -ENODEV;
@@ -194,7 +194,7 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
 
                        raw_port_num = usb_hcd_find_raw_port_number(hcd,
                                port_num);
-                       *handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev),
+                       *handle = acpi_get_child(ACPI_HANDLE(&udev->dev),
                                raw_port_num);
                        if (!*handle)
                                return -ENODEV;
index d15f6e80479f5388f52af9eba6effb9e6e769a11..188825122aae8a037bccc8255dbd43211ca6a941 100644 (file)
@@ -59,12 +59,12 @@ static int xen_add_device(struct device *dev)
                        add.flags = XEN_PCI_DEV_EXTFN;
 
 #ifdef CONFIG_ACPI
-               handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+               handle = ACPI_HANDLE(&pci_dev->dev);
                if (!handle && pci_dev->bus->bridge)
-                       handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+                       handle = ACPI_HANDLE(pci_dev->bus->bridge);
 #ifdef CONFIG_PCI_IOV
                if (!handle && pci_dev->is_virtfn)
-                       handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+                       handle = ACPI_HANDLE(physfn->bus->bridge);
 #endif
                if (handle) {
                        acpi_status status;
index f039b104a98e9093cadfb62b040a604628a76b7a..b03dd23feda8104d70536b514ec996fa612c5b89 100644 (file)
 #include "v9fs_vfs.h"
 #include "fid.h"
 
-/**
- * v9fs_dentry_delete - called when dentry refcount equals 0
- * @dentry:  dentry in question
- *
- * By returning 1 here we should remove cacheing of unused
- * dentry components.
- *
- */
-
-static int v9fs_dentry_delete(const struct dentry *dentry)
-{
-       p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
-                dentry->d_name.name, dentry);
-
-       return 1;
-}
-
 /**
  * v9fs_cached_dentry_delete - called when dentry refcount equals 0
  * @dentry:  dentry in question
@@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
 };
 
 const struct dentry_operations v9fs_dentry_operations = {
-       .d_delete = v9fs_dentry_delete,
+       .d_delete = always_delete_dentry,
        .d_release = v9fs_dentry_release,
 };
index 823efcbb6ccd1dc7936f77890183cee0ac93ed94..08159ed13649cacbec1825065e24b2b5b61be267 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -80,6 +80,8 @@ struct kioctx {
        struct percpu_ref       users;
        atomic_t                dead;
 
+       struct percpu_ref       reqs;
+
        unsigned long           user_id;
 
        struct __percpu kioctx_cpu *cpu;
@@ -107,7 +109,6 @@ struct kioctx {
        struct page             **ring_pages;
        long                    nr_pages;
 
-       struct rcu_head         rcu_head;
        struct work_struct      free_work;
 
        struct {
@@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx)
 
        put_aio_ring_file(ctx);
 
-       if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
+       if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
                kfree(ctx->ring_pages);
+               ctx->ring_pages = NULL;
+       }
 }
 
 static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -463,26 +466,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
        return cancel(kiocb);
 }
 
-static void free_ioctx_rcu(struct rcu_head *head)
+static void free_ioctx(struct work_struct *work)
 {
-       struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+       struct kioctx *ctx = container_of(work, struct kioctx, free_work);
 
+       pr_debug("freeing %p\n", ctx);
+
+       aio_free_ring(ctx);
        free_percpu(ctx->cpu);
        kmem_cache_free(kioctx_cachep, ctx);
 }
 
+static void free_ioctx_reqs(struct percpu_ref *ref)
+{
+       struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
+
+       INIT_WORK(&ctx->free_work, free_ioctx);
+       schedule_work(&ctx->free_work);
+}
+
 /*
  * When this function runs, the kioctx has been removed from the "hash table"
  * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  * now it's safe to cancel any that need to be.
  */
-static void free_ioctx(struct work_struct *work)
+static void free_ioctx_users(struct percpu_ref *ref)
 {
-       struct kioctx *ctx = container_of(work, struct kioctx, free_work);
-       struct aio_ring *ring;
+       struct kioctx *ctx = container_of(ref, struct kioctx, users);
        struct kiocb *req;
-       unsigned cpu, avail;
-       DEFINE_WAIT(wait);
 
        spin_lock_irq(&ctx->ctx_lock);
 
@@ -496,54 +507,8 @@ static void free_ioctx(struct work_struct *work)
 
        spin_unlock_irq(&ctx->ctx_lock);
 
-       for_each_possible_cpu(cpu) {
-               struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu);
-
-               atomic_add(kcpu->reqs_available, &ctx->reqs_available);
-               kcpu->reqs_available = 0;
-       }
-
-       while (1) {
-               prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE);
-
-               ring = kmap_atomic(ctx->ring_pages[0]);
-               avail = (ring->head <= ring->tail)
-                        ? ring->tail - ring->head
-                        : ctx->nr_events - ring->head + ring->tail;
-
-               atomic_add(avail, &ctx->reqs_available);
-               ring->head = ring->tail;
-               kunmap_atomic(ring);
-
-               if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)
-                       break;
-
-               schedule();
-       }
-       finish_wait(&ctx->wait, &wait);
-
-       WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1);
-
-       aio_free_ring(ctx);
-
-       pr_debug("freeing %p\n", ctx);
-
-       /*
-        * Here the call_rcu() is between the wait_event() for reqs_active to
-        * hit 0, and freeing the ioctx.
-        *
-        * aio_complete() decrements reqs_active, but it has to touch the ioctx
-        * after to issue a wakeup so we use rcu.
-        */
-       call_rcu(&ctx->rcu_head, free_ioctx_rcu);
-}
-
-static void free_ioctx_ref(struct percpu_ref *ref)
-{
-       struct kioctx *ctx = container_of(ref, struct kioctx, users);
-
-       INIT_WORK(&ctx->free_work, free_ioctx);
-       schedule_work(&ctx->free_work);
+       percpu_ref_kill(&ctx->reqs);
+       percpu_ref_put(&ctx->reqs);
 }
 
 static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
@@ -602,6 +567,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
        }
 }
 
+static void aio_nr_sub(unsigned nr)
+{
+       spin_lock(&aio_nr_lock);
+       if (WARN_ON(aio_nr - nr > aio_nr))
+               aio_nr = 0;
+       else
+               aio_nr -= nr;
+       spin_unlock(&aio_nr_lock);
+}
+
 /* ioctx_alloc
  *     Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
  */
@@ -639,8 +614,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
        ctx->max_reqs = nr_events;
 
-       if (percpu_ref_init(&ctx->users, free_ioctx_ref))
-               goto out_freectx;
+       if (percpu_ref_init(&ctx->users, free_ioctx_users))
+               goto err;
+
+       if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+               goto err;
 
        spin_lock_init(&ctx->ctx_lock);
        spin_lock_init(&ctx->completion_lock);
@@ -651,10 +629,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
        ctx->cpu = alloc_percpu(struct kioctx_cpu);
        if (!ctx->cpu)
-               goto out_freeref;
+               goto err;
 
        if (aio_setup_ring(ctx) < 0)
-               goto out_freepcpu;
+               goto err;
 
        atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
        ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
@@ -666,7 +644,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
            aio_nr + nr_events < aio_nr) {
                spin_unlock(&aio_nr_lock);
-               goto out_cleanup;
+               err = -EAGAIN;
+               goto err;
        }
        aio_nr += ctx->max_reqs;
        spin_unlock(&aio_nr_lock);
@@ -675,23 +654,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
        err = ioctx_add_table(ctx, mm);
        if (err)
-               goto out_cleanup_put;
+               goto err_cleanup;
 
        pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
                 ctx, ctx->user_id, mm, ctx->nr_events);
        return ctx;
 
-out_cleanup_put:
-       percpu_ref_put(&ctx->users);
-out_cleanup:
-       err = -EAGAIN;
-       aio_free_ring(ctx);
-out_freepcpu:
+err_cleanup:
+       aio_nr_sub(ctx->max_reqs);
+err:
        free_percpu(ctx->cpu);
-out_freeref:
+       free_percpu(ctx->reqs.pcpu_count);
        free_percpu(ctx->users.pcpu_count);
-out_freectx:
-       put_aio_ring_file(ctx);
        kmem_cache_free(kioctx_cachep, ctx);
        pr_debug("error allocating ioctx %d\n", err);
        return ERR_PTR(err);
@@ -726,10 +700,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
                 * -EAGAIN with no ioctxs actually in use (as far as userspace
                 *  could tell).
                 */
-               spin_lock(&aio_nr_lock);
-               BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
-               aio_nr -= ctx->max_reqs;
-               spin_unlock(&aio_nr_lock);
+               aio_nr_sub(ctx->max_reqs);
 
                if (ctx->mmap_size)
                        vm_munmap(ctx->mmap_base, ctx->mmap_size);
@@ -861,6 +832,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
        if (unlikely(!req))
                goto out_put;
 
+       percpu_ref_get(&ctx->reqs);
+
        req->ki_ctx = ctx;
        return req;
 out_put:
@@ -930,12 +903,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
                return;
        }
 
-       /*
-        * Take rcu_read_lock() in case the kioctx is being destroyed, as we
-        * need to issue a wakeup after incrementing reqs_available.
-        */
-       rcu_read_lock();
-
        if (iocb->ki_list.next) {
                unsigned long flags;
 
@@ -1010,7 +977,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
        if (waitqueue_active(&ctx->wait))
                wake_up(&ctx->wait);
 
-       rcu_read_unlock();
+       percpu_ref_put(&ctx->reqs);
 }
 EXPORT_SYMBOL(aio_complete);
 
@@ -1421,6 +1388,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        return 0;
 out_put_req:
        put_reqs_available(ctx, 1);
+       percpu_ref_put(&ctx->reqs);
        kiocb_free(req);
        return ret;
 }
index 2bdb4e25ee77db6c2a5c135b3c726eae734153b9..33d79a4eb92d6e623aa90e2291af39b2b2689d83 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
 
 static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
                          *page, unsigned int len, unsigned int offset,
-                         unsigned short max_sectors)
+                         unsigned int max_sectors)
 {
        int retried_segments = 0;
        struct bio_vec *bvec;
index f9d5094e102943db81708451a43312f6ac2e9525..aa976eced2d2ea8dfa9c0e97ea84da7438626d62 100644 (file)
@@ -9,12 +9,17 @@ config BTRFS_FS
        select XOR_BLOCKS
 
        help
-         Btrfs is a new filesystem with extents, writable snapshotting,
-         support for multiple devices and many more features.
+         Btrfs is a general purpose copy-on-write filesystem with extents,
+         writable snapshotting, support for multiple devices and many more
+         features focused on fault tolerance, repair and easy administration.
 
-         Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
-         FINALIZED.  You should say N here unless you are interested in
-         testing Btrfs with non-critical data.
+         The filesystem disk format is no longer unstable, and it's not
+         expected to change unless there are strong reasons to do so. If there
+         is a format change, file systems with a unchanged format will
+         continue to be mountable and usable by newer kernels.
+
+         For more information, please see the web pages at
+         http://btrfs.wiki.kernel.org.
 
          To compile this file system support as a module, choose M here. The
          module will be called btrfs.
index 8aec751fa464c126a7fd80e21b526692896d98c9..c1e0b0caf9cc975c2822cadf9aaaf0c1454dcf91 100644 (file)
@@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
        spin_lock_irq(&workers->lock);
        if (workers->stopping) {
                spin_unlock_irq(&workers->lock);
+               ret = -EINVAL;
                goto fail_kthread;
        }
        list_add_tail(&worker->worker_list, &workers->idle_list);
index e0aab44569741342e8a197fcbdfe4d7e6fbd6481..b50764bef1410c2750b17d943ae3597899b3ee9e 100644 (file)
  * the integrity of (super)-block write requests, do not
  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
  * include and compile the integrity check tool.
+ *
+ * Expect millions of lines of information in the kernel log with an
+ * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
+ * kernel config to at least 26 (which is 64MB). Usually the value is
+ * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
+ * changed like this before LOG_BUF_SHIFT can be set to a high value:
+ * config LOG_BUF_SHIFT
+ *       int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
+ *       range 12 30
  */
 
 #include <linux/sched.h>
 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                    0x00000400
 #define BTRFSIC_PRINT_MASK_NUM_COPIES                          0x00000800
 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS               0x00001000
+#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE               0x00002000
 
 struct btrfsic_dev_state;
 struct btrfsic_state;
@@ -3015,6 +3025,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
            (rw & WRITE) && NULL != bio->bi_io_vec) {
                unsigned int i;
                u64 dev_bytenr;
+               u64 cur_bytenr;
                int bio_is_patched;
                char **mapped_datav;
 
@@ -3033,6 +3044,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
                                       GFP_NOFS);
                if (!mapped_datav)
                        goto leave;
+               cur_bytenr = dev_bytenr;
                for (i = 0; i < bio->bi_vcnt; i++) {
                        BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
                        mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
@@ -3044,16 +3056,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
                                kfree(mapped_datav);
                                goto leave;
                        }
-                       if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
-                            BTRFSIC_PRINT_MASK_VERBOSE) ==
-                           (dev_state->state->print_mask &
-                            (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
-                             BTRFSIC_PRINT_MASK_VERBOSE)))
+                       if (dev_state->state->print_mask &
+                           BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
                                printk(KERN_INFO
-                                      "#%u: page=%p, len=%u, offset=%u\n",
-                                      i, bio->bi_io_vec[i].bv_page,
-                                      bio->bi_io_vec[i].bv_len,
+                                      "#%u: bytenr=%llu, len=%u, offset=%u\n",
+                                      i, cur_bytenr, bio->bi_io_vec[i].bv_len,
                                       bio->bi_io_vec[i].bv_offset);
+                       cur_bytenr += bio->bi_io_vec[i].bv_len;
                }
                btrfsic_process_written_block(dev_state, dev_bytenr,
                                              mapped_datav, bio->bi_vcnt,
index f9aeb2759a646e7d3c258ce4ea3498e7c786e1ad..54ab86127f7af49500f6a0bf2bf4b63cd6074c40 100644 (file)
@@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                           struct btrfs_ordered_sum *sums);
 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
                       struct bio *bio, u64 file_start, int contig);
-int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
-                       struct btrfs_root *root, struct btrfs_path *path,
-                       u64 isize);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                             struct list_head *list, int search_commit);
 /* inode.c */
@@ -3744,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                             int skip_pinned);
-int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
-                              u64 start, u64 end, int skip_pinned,
-                              int modified);
 extern const struct file_operations btrfs_file_operations;
 int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root, struct inode *inode,
index 342f9fd411e3f5d79c15f675300ebcabad95738b..2cfc3dfff64f5708f71ec83af691b425b9f01b01 100644 (file)
@@ -366,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        dev_replace->tgtdev = tgt_device;
 
        printk_in_rcu(KERN_INFO
-                     "btrfs: dev_replace from %s (devid %llu) to %s) started\n",
+                     "btrfs: dev_replace from %s (devid %llu) to %s started\n",
                      src_device->missing ? "<missing disk>" :
                        rcu_str_deref(src_device->name),
                      src_device->devid,
index 4c4ed0bb3da1bfc02a5dad41f66bdaf52c49aa71..8072cfa8a3b16c075e5c381f481e7cb874d9c531 100644 (file)
@@ -3517,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
 int btrfs_commit_super(struct btrfs_root *root)
 {
        struct btrfs_trans_handle *trans;
-       int ret;
 
        mutex_lock(&root->fs_info->cleaner_mutex);
        btrfs_run_delayed_iputs(root);
@@ -3531,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root)
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
-       ret = btrfs_commit_transaction(trans, root);
-       if (ret)
-               return ret;
-       /* run commit again to drop the original snapshot */
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
-       ret = btrfs_commit_transaction(trans, root);
-       if (ret)
-               return ret;
-       ret = btrfs_write_and_wait_transaction(NULL, root);
-       if (ret) {
-               btrfs_error(root->fs_info, ret,
-                           "Failed to sync btree inode to disk.");
-               return ret;
-       }
-
-       ret = write_ctree_super(NULL, root, 0);
-       return ret;
+       return btrfs_commit_transaction(trans, root);
 }
 
 int close_ctree(struct btrfs_root *root)
index 856bc2b2192cb8c2c86f23676abf36cc3cb361fe..8e457fca0a0ba5c04afb84414ccd640821a640d1 100644 (file)
@@ -1980,6 +1980,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
        int ret;
 
+       ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
        BUG_ON(!mirror_num);
 
        /* we can't repair anything in raid56 yet */
@@ -2036,6 +2037,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
        unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
        int ret = 0;
 
+       if (root->fs_info->sb->s_flags & MS_RDONLY)
+               return -EROFS;
+
        for (i = 0; i < num_pages; i++) {
                struct page *p = extent_buffer_page(eb, i);
                ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2057,12 +2061,12 @@ static int clean_io_failure(u64 start, struct page *page)
        u64 private;
        u64 private_failure;
        struct io_failure_record *failrec;
-       struct btrfs_fs_info *fs_info;
+       struct inode *inode = page->mapping->host;
+       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
        struct extent_state *state;
        int num_copies;
        int did_repair = 0;
        int ret;
-       struct inode *inode = page->mapping->host;
 
        private = 0;
        ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2085,6 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
                did_repair = 1;
                goto out;
        }
+       if (fs_info->sb->s_flags & MS_RDONLY)
+               goto out;
 
        spin_lock(&BTRFS_I(inode)->io_tree.lock);
        state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2094,7 +2100,6 @@ static int clean_io_failure(u64 start, struct page *page)
 
        if (state && state->start <= failrec->start &&
            state->end >= failrec->start + failrec->len - 1) {
-               fs_info = BTRFS_I(inode)->root->fs_info;
                num_copies = btrfs_num_copies(fs_info, failrec->logical,
                                              failrec->len);
                if (num_copies > 1)  {
index da8d2f696ac5c461154046e985dcd8dfc5c22b29..f1a77449d032b1fbd481eb16cd232653e5bfbefa 100644 (file)
@@ -2129,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
                                                  old->extent_offset, fs_info,
                                                  path, record_one_backref,
                                                  old);
-               BUG_ON(ret < 0 && ret != -ENOENT);
+               if (ret < 0 && ret != -ENOENT)
+                       return false;
 
                /* no backref to be processed for this extent */
                if (!old->count) {
@@ -6186,8 +6187,7 @@ insert:
        write_unlock(&em_tree->lock);
 out:
 
-       if (em)
-               trace_btrfs_get_extent(root, em);
+       trace_btrfs_get_extent(root, em);
 
        if (path)
                btrfs_free_path(path);
index 25a8f3812f14e0410a95902d2985455313cb1e6f..69582d5b69d1f6064a77a409760a3ba1886b6d92 100644 (file)
@@ -638,6 +638,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
                        WARN_ON(nr < 0);
                }
        }
+       list_splice_tail(&splice, &fs_info->ordered_roots);
        spin_unlock(&fs_info->ordered_root_lock);
 }
 
@@ -803,7 +804,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                        btrfs_put_ordered_extent(ordered);
                        break;
                }
-               if (ordered->file_offset + ordered->len < start) {
+               if (ordered->file_offset + ordered->len <= start) {
                        btrfs_put_ordered_extent(ordered);
                        break;
                }
index 2544805544f0baf137c33de162c46f14b5ddf002..561e2f16ba3e3ff3b0be72b12b4a052b86082d2a 100644 (file)
@@ -938,8 +938,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
                                BTRFS_DEV_STAT_CORRUPTION_ERRS);
        }
 
-       if (sctx->readonly && !sctx->is_dev_replace)
-               goto did_not_correct_error;
+       if (sctx->readonly) {
+               ASSERT(!sctx->is_dev_replace);
+               goto out;
+       }
 
        if (!is_metadata && !have_csum) {
                struct scrub_fixup_nodatasum *fixup_nodatasum;
index 57c16b46afbd353b8fdcc22021ae8e00aca0d040..c6a872a8a46862948e93c343cdd0c7479caf3883 100644 (file)
@@ -1480,7 +1480,7 @@ static void do_async_commit(struct work_struct *work)
         * We've got freeze protection passed with the transaction.
         * Tell lockdep about it.
         */
-       if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
+       if (ac->newtrans->type & __TRANS_FREEZABLE)
                rwsem_acquire_read(
                     &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
                     0, 1, _THIS_IP_);
@@ -1521,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
         * Tell lockdep we've released the freeze rwsem, since the
         * async commit thread will be the one to unlock it.
         */
-       if (trans->type < TRANS_JOIN_NOLOCK)
+       if (ac->newtrans->type & __TRANS_FREEZABLE)
                rwsem_release(
                        &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
                        1, _THIS_IP_);
index 744553c83fe2ad79a5b07715167e6d887373bc8f..9f7fc51ca334864b72336e127d786047dfb1f5de 100644 (file)
@@ -3697,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                        ret = btrfs_truncate_inode_items(trans, log,
                                                         inode, 0, 0);
                } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                             &BTRFS_I(inode)->runtime_flags)) {
+                                             &BTRFS_I(inode)->runtime_flags) ||
+                          inode_only == LOG_INODE_EXISTS) {
                        if (inode_only == LOG_INODE_ALL)
                                fast_search = true;
                        max_key.type = BTRFS_XATTR_ITEM_KEY;
@@ -3801,7 +3802,7 @@ log_extents:
                        err = ret;
                        goto out_unlock;
                }
-       } else {
+       } else if (inode_only == LOG_INODE_ALL) {
                struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
                struct extent_map *em, *n;
 
index 0db63709786291731aa3e465dd20c46830890311..92303f42baaa92d5d845edddff1f8600fc46518e 100644 (file)
@@ -5394,7 +5394,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
 {
        struct bio_vec *prev;
        struct request_queue *q = bdev_get_queue(bdev);
-       unsigned short max_sectors = queue_max_sectors(q);
+       unsigned int max_sectors = queue_max_sectors(q);
        struct bvec_merge_data bvm = {
                .bi_bdev = bdev,
                .bi_sector = sector,
index 277bd1be21fd70061fcd8d6694ed65ab4a34abd3..e081acbac2e756372340379fea7a69a84c9c3dd0 100644 (file)
@@ -56,29 +56,28 @@ static void configfs_d_iput(struct dentry * dentry,
        struct configfs_dirent *sd = dentry->d_fsdata;
 
        if (sd) {
-               BUG_ON(sd->s_dentry != dentry);
                /* Coordinate with configfs_readdir */
                spin_lock(&configfs_dirent_lock);
-               sd->s_dentry = NULL;
+               /* Coordinate with configfs_attach_attr where will increase
+                * sd->s_count and update sd->s_dentry to new allocated one.
+                * Only set sd->dentry to null when this dentry is the only
+                * sd owner.
+                * If not do so, configfs_d_iput may run just after
+                * configfs_attach_attr and set sd->s_dentry to null
+                * even it's still in use.
+                */
+               if (atomic_read(&sd->s_count) <= 2)
+                       sd->s_dentry = NULL;
+
                spin_unlock(&configfs_dirent_lock);
                configfs_put(sd);
        }
        iput(inode);
 }
 
-/*
- * We _must_ delete our dentries on last dput, as the chain-to-parent
- * behavior is required to clear the parents of default_groups.
- */
-static int configfs_d_delete(const struct dentry *dentry)
-{
-       return 1;
-}
-
 const struct dentry_operations configfs_dentry_ops = {
        .d_iput         = configfs_d_iput,
-       /* simple_delete_dentry() isn't exported */
-       .d_delete       = configfs_d_delete,
+       .d_delete       = always_delete_dentry,
 };
 
 #ifdef CONFIG_LOCKDEP
@@ -426,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
        struct configfs_attribute * attr = sd->s_element;
        int error;
 
+       spin_lock(&configfs_dirent_lock);
        dentry->d_fsdata = configfs_get(sd);
        sd->s_dentry = dentry;
+       spin_unlock(&configfs_dirent_lock);
+
        error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
                                configfs_init_file);
        if (error) {
index 62406b6959b63389bd503cf6db4ea528266b79a8..bc3fbcd32558fd61823b126997ace2785d7bac21 100644 (file)
@@ -695,7 +695,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
        while (nr) {
                if (dump_interrupted())
                        return 0;
-               n = vfs_write(file, addr, nr, &pos);
+               n = __kernel_write(file, addr, nr, &pos);
                if (n <= 0)
                        return 0;
                file->f_pos = pos;
@@ -733,7 +733,7 @@ int dump_align(struct coredump_params *cprm, int align)
 {
        unsigned mod = cprm->written & (align - 1);
        if (align & (align - 1))
-               return -EINVAL;
-       return mod ? dump_skip(cprm, align - mod) : 0;
+               return 0;
+       return mod ? dump_skip(cprm, align - mod) : 1;
 }
 EXPORT_SYMBOL(dump_align);
index 0a38ef8d7f0088579089d101c99a0e12193f22f5..4bdb300b16e2e940bab8eeb07417b7f87914815f 100644 (file)
@@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
-/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
- */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
-       if (!(*seq & 1))        /* Even */
-               *seq = read_seqbegin(lock);
-       else                    /* Odd */
-               read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
-       return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
-       if (seq & 1)
-               read_sequnlock_excl(lock);
-}
-
 /*
  * This is the single most critical data structure when it comes
  * to the dcache: the hashtable for lookups. Somebody should try
@@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq)
  * This hash-function tries to avoid losing too many bits of hash
  * information, yet avoid using a prime hash-size or similar.
  */
-#define D_HASHBITS     d_hash_shift
-#define D_HASHMASK     d_hash_mask
 
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
@@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
                                        unsigned int hash)
 {
        hash += (unsigned long) parent / L1_CACHE_BYTES;
-       hash = hash + (hash >> D_HASHBITS);
-       return dentry_hashtable + (hash & D_HASHMASK);
+       hash = hash + (hash >> d_hash_shift);
+       return dentry_hashtable + (hash & d_hash_mask);
 }
 
 /* Statistics gathering. */
@@ -469,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 {
        list_del(&dentry->d_u.d_child);
        /*
-        * Inform try_to_ascend() that we are no longer attached to the
+        * Inform d_walk() that we are no longer attached to the
         * dentry tree
         */
        dentry->d_flags |= DCACHE_DENTRY_KILLED;
@@ -1069,34 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
-/*
- * This tries to ascend one level of parenthood, but
- * we can race with renaming, so we need to re-check
- * the parenthood after dropping the lock and check
- * that the sequence number still matches.
- */
-static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
-{
-       struct dentry *new = old->d_parent;
-
-       rcu_read_lock();
-       spin_unlock(&old->d_lock);
-       spin_lock(&new->d_lock);
-
-       /*
-        * might go back up the wrong parent if we have had a rename
-        * or deletion
-        */
-       if (new != old->d_parent ||
-                (old->d_flags & DCACHE_DENTRY_KILLED) ||
-                need_seqretry(&rename_lock, seq)) {
-               spin_unlock(&new->d_lock);
-               new = NULL;
-       }
-       rcu_read_unlock();
-       return new;
-}
-
 /**
  * enum d_walk_ret - action to talke during tree walk
  * @D_WALK_CONTINUE:   contrinue walk
@@ -1185,9 +1126,24 @@ resume:
         */
        if (this_parent != parent) {
                struct dentry *child = this_parent;
-               this_parent = try_to_ascend(this_parent, seq);
-               if (!this_parent)
+               this_parent = child->d_parent;
+
+               rcu_read_lock();
+               spin_unlock(&child->d_lock);
+               spin_lock(&this_parent->d_lock);
+
+               /*
+                * might go back up the wrong parent if we have had a rename
+                * or deletion
+                */
+               if (this_parent != child->d_parent ||
+                        (child->d_flags & DCACHE_DENTRY_KILLED) ||
+                        need_seqretry(&rename_lock, seq)) {
+                       spin_unlock(&this_parent->d_lock);
+                       rcu_read_unlock();
                        goto rename_retry;
+               }
+               rcu_read_unlock();
                next = child->d_u.d_child.next;
                goto resume;
        }
index a8766b880c0783b3adca66bac0e5f0ba76840c70..becc725a195308edfcd518572f1f8784052b1bb6 100644 (file)
@@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
        return 0;
 }
 
-/*
- * Retaining negative dentries for an in-memory filesystem just wastes
- * memory and lookup time: arrange for them to be deleted immediately.
- */
-static int efivarfs_delete_dentry(const struct dentry *dentry)
-{
-       return 1;
-}
-
 static struct dentry_operations efivarfs_d_ops = {
        .d_compare = efivarfs_d_compare,
        .d_hash = efivarfs_d_hash,
-       .d_delete = efivarfs_delete_dentry,
+       .d_delete = always_delete_dentry,
 };
 
 static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
index 977319fd77f39de88ef66979d1de95f7d846f9a6..7ea097f6b341f06982f3ea3b068de5755b1605e0 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1380,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm)
        if (retval)
                return retval;
 
-       retval = audit_bprm(bprm);
-       if (retval)
-               return retval;
-
        retval = -ENOENT;
  retry:
        read_lock(&binfmt_lock);
@@ -1431,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm)
 
        ret = search_binary_handler(bprm);
        if (ret >= 0) {
+               audit_bprm(bprm);
                trace_sched_process_exec(current, old_pid, bprm);
                ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
                current->did_exec = 1;
index e66a8009aff16d66b1179bba0ffc3a266ff923f6..c8420f7e4db604da3663da61c5ae4556b6439783 100644 (file)
@@ -1899,7 +1899,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
                        gi->nhash = 0;
                }
        /* Skip entries for other sb and dead entries */
-       } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref));
+       } while (gi->sdp != gi->gl->gl_sbd ||
+                __lockref_is_dead(&gi->gl->gl_lockref));
 
        return 0;
 }
index 1615df16cf4eb9ed5c5c4f20ee46c56bc00d3143..7119504159f17ba8fdde8317abee9eaf38583c71 100644 (file)
@@ -1171,8 +1171,11 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
        if (d != NULL)
                dentry = d;
        if (dentry->d_inode) {
-               if (!(*opened & FILE_OPENED))
+               if (!(*opened & FILE_OPENED)) {
+                       if (d == NULL)
+                               dget(dentry);
                        return finish_no_open(file, dentry);
+               }
                dput(d);
                return 0;
        }
index c8423d6de6c3ee54341d5ea5c4eb7e20ca7255de..2a6ba06bee6fca0ffada9c155ca243466fcf1f62 100644 (file)
@@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl)
 static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
                             char *lvb_bits)
 {
-       uint32_t gen;
+       __le32 gen;
        memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
-       memcpy(&gen, lvb_bits, sizeof(uint32_t));
+       memcpy(&gen, lvb_bits, sizeof(__le32));
        *lvb_gen = le32_to_cpu(gen);
 }
 
 static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
                              char *lvb_bits)
 {
-       uint32_t gen;
+       __le32 gen;
        memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
        gen = cpu_to_le32(lvb_gen);
-       memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+       memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
 }
 
 static int all_jid_bits_clear(char *lvb)
index 453b50eaddec42f8e8461d9f96965e7d40ba746f..98236d0df3cae7ce7666a10dc7fc907590b873b0 100644 (file)
@@ -667,7 +667,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
        struct buffer_head *bh;
        struct page *page;
        void *kaddr, *ptr;
-       struct gfs2_quota q, *qp;
+       struct gfs2_quota q;
        int err, nbytes;
        u64 size;
 
@@ -683,28 +683,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                return err;
 
        err = -EIO;
-       qp = &q;
-       qp->qu_value = be64_to_cpu(qp->qu_value);
-       qp->qu_value += change;
-       qp->qu_value = cpu_to_be64(qp->qu_value);
-       qd->qd_qb.qb_value = qp->qu_value;
+       be64_add_cpu(&q.qu_value, change);
+       qd->qd_qb.qb_value = q.qu_value;
        if (fdq) {
                if (fdq->d_fieldmask & FS_DQ_BSOFT) {
-                       qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
-                       qd->qd_qb.qb_warn = qp->qu_warn;
+                       q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
+                       qd->qd_qb.qb_warn = q.qu_warn;
                }
                if (fdq->d_fieldmask & FS_DQ_BHARD) {
-                       qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
-                       qd->qd_qb.qb_limit = qp->qu_limit;
+                       q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
+                       qd->qd_qb.qb_limit = q.qu_limit;
                }
                if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
-                       qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
-                       qd->qd_qb.qb_value = qp->qu_value;
+                       q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+                       qd->qd_qb.qb_value = q.qu_value;
                }
        }
 
        /* Write the quota into the quota file on disk */
-       ptr = qp;
+       ptr = &q;
        nbytes = sizeof(struct gfs2_quota);
 get_a_page:
        page = find_or_create_page(mapping, index, GFP_NOFS);
index 4d83abdd5635273b3e0af9589eec83226be249ff..c8d6161bd682bd6cbd05247e2f0efc8274afe0b9 100644 (file)
@@ -1127,7 +1127,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
                rgd->rd_free_clone = rgd->rd_free;
        }
-       if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+       if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
                                     rgd->rd_bits[0].bi_bh->b_data);
@@ -1161,7 +1161,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
                return 0;
 
-       if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+       if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
                return gfs2_rgrp_bh_get(rgd);
 
        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
index 25437280a2071b8970efe6e394edb97a4433acd8..db23ce1bd9031028390eb33016a05783950bc379 100644 (file)
@@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
 
 #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
 
-static int hostfs_d_delete(const struct dentry *dentry)
-{
-       return 1;
-}
-
-static const struct dentry_operations hostfs_dentry_ops = {
-       .d_delete               = hostfs_d_delete,
-};
-
 /* Changed in hostfs_args before the kernel starts running */
 static char *root_ino = "";
 static int append = 0;
@@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
        sb->s_blocksize_bits = 10;
        sb->s_magic = HOSTFS_SUPER_MAGIC;
        sb->s_op = &hostfs_sbops;
-       sb->s_d_op = &hostfs_dentry_ops;
+       sb->s_d_op = &simple_dentry_operations;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
 
        /* NULL is printed as <NULL> by sprintf: avoid that. */
index 5de06947ba5ebf2e98caa8177085f7c191df4602..a1844244246f8e8e8da60da61e2aaabd2750e58e 100644 (file)
@@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs);
  * Retaining negative dentries for an in-memory filesystem just wastes
  * memory and lookup time: arrange for them to be deleted immediately.
  */
-static int simple_delete_dentry(const struct dentry *dentry)
+int always_delete_dentry(const struct dentry *dentry)
 {
        return 1;
 }
+EXPORT_SYMBOL(always_delete_dentry);
+
+const struct dentry_operations simple_dentry_operations = {
+       .d_delete = always_delete_dentry,
+};
+EXPORT_SYMBOL(simple_dentry_operations);
 
 /*
  * Lookup the data. This is trivial - if the dentry didn't already
@@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry)
  */
 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
-       static const struct dentry_operations simple_dentry_operations = {
-               .d_delete = simple_delete_dentry,
-       };
-
        if (dentry->d_name.len > NAME_MAX)
                return ERR_PTR(-ENAMETOOLONG);
        if (!dentry->d_sb->s_d_op)
index e029a4cbff7db7b23af15628ca4d8c2cac5da491..8f77a8cea289350b9d0e427b284cc01a2df4691d 100644 (file)
@@ -2435,6 +2435,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
  */
 static inline int may_create(struct inode *dir, struct dentry *child)
 {
+       audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
        if (child->d_inode)
                return -EEXIST;
        if (IS_DEADDIR(dir))
index 088de1355e930c05fd9dd9c03bb3710b7531fed7..ee7237f99f54cd413dba6375dbc344084d0ece56 100644 (file)
@@ -141,8 +141,8 @@ xdr_error:                                  \
 
 static void next_decode_page(struct nfsd4_compoundargs *argp)
 {
-       argp->pagelist++;
        argp->p = page_address(argp->pagelist[0]);
+       argp->pagelist++;
        if (argp->pagelen < PAGE_SIZE) {
                argp->end = argp->p + (argp->pagelen>>2);
                argp->pagelen = 0;
@@ -1229,6 +1229,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
                len -= pages * PAGE_SIZE;
 
                argp->p = (__be32 *)page_address(argp->pagelist[0]);
+               argp->pagelist++;
                argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
        }
        argp->p += XDR_QUADLEN(len);
index 94b5f5d2bfedd94be68e7879a8cf8b40d0e3cde3..7eea63cada1d4a3ea5f9dd95401f83714689fcb0 100644 (file)
@@ -298,41 +298,12 @@ commit_metadata(struct svc_fh *fhp)
 }
 
 /*
- * Set various file attributes.
- * N.B. After this call fhp needs an fh_put
+ * Go over the attributes and take care of the small differences between
+ * NFS semantics and what Linux expects.
  */
-__be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
-            int check_guard, time_t guardtime)
+static void
+nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
 {
-       struct dentry   *dentry;
-       struct inode    *inode;
-       int             accmode = NFSD_MAY_SATTR;
-       umode_t         ftype = 0;
-       __be32          err;
-       int             host_err;
-       int             size_change = 0;
-
-       if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
-               accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
-       if (iap->ia_valid & ATTR_SIZE)
-               ftype = S_IFREG;
-
-       /* Get inode */
-       err = fh_verify(rqstp, fhp, ftype, accmode);
-       if (err)
-               goto out;
-
-       dentry = fhp->fh_dentry;
-       inode = dentry->d_inode;
-
-       /* Ignore any mode updates on symlinks */
-       if (S_ISLNK(inode->i_mode))
-               iap->ia_valid &= ~ATTR_MODE;
-
-       if (!iap->ia_valid)
-               goto out;
-
        /*
         * NFSv2 does not differentiate between "set-[ac]time-to-now"
         * which only requires access, and "set-[ac]time-to-X" which
@@ -342,8 +313,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
         * convert to "set to now" instead of "set to explicit time"
         *
         * We only call inode_change_ok as the last test as technically
-        * it is not an interface that we should be using.  It is only
-        * valid if the filesystem does not define it's own i_op->setattr.
+        * it is not an interface that we should be using.
         */
 #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
 #define        MAX_TOUCH_TIME_ERROR (30*60)
@@ -369,30 +339,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                        iap->ia_valid &= ~BOTH_TIME_SET;
                }
        }
-           
-       /*
-        * The size case is special.
-        * It changes the file as well as the attributes.
-        */
-       if (iap->ia_valid & ATTR_SIZE) {
-               if (iap->ia_size < inode->i_size) {
-                       err = nfsd_permission(rqstp, fhp->fh_export, dentry,
-                                       NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
-                       if (err)
-                               goto out;
-               }
-
-               host_err = get_write_access(inode);
-               if (host_err)
-                       goto out_nfserr;
-
-               size_change = 1;
-               host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
-               if (host_err) {
-                       put_write_access(inode);
-                       goto out_nfserr;
-               }
-       }
 
        /* sanitize the mode change */
        if (iap->ia_valid & ATTR_MODE) {
@@ -415,32 +361,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                        iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
                }
        }
+}
 
-       /* Change the attributes. */
+static __be32
+nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               struct iattr *iap)
+{
+       struct inode *inode = fhp->fh_dentry->d_inode;
+       int host_err;
 
-       iap->ia_valid |= ATTR_CTIME;
+       if (iap->ia_size < inode->i_size) {
+               __be32 err;
 
-       err = nfserr_notsync;
-       if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
-               host_err = nfsd_break_lease(inode);
-               if (host_err)
-                       goto out_nfserr;
-               fh_lock(fhp);
+               err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+                               NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+               if (err)
+                       return err;
+       }
 
-               host_err = notify_change(dentry, iap, NULL);
-               err = nfserrno(host_err);
-               fh_unlock(fhp);
+       host_err = get_write_access(inode);
+       if (host_err)
+               goto out_nfserrno;
+
+       host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+       if (host_err)
+               goto out_put_write_access;
+       return 0;
+
+out_put_write_access:
+       put_write_access(inode);
+out_nfserrno:
+       return nfserrno(host_err);
+}
+
+/*
+ * Set various file attributes.  After this call fhp needs an fh_put.
+ */
+__be32
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+            int check_guard, time_t guardtime)
+{
+       struct dentry   *dentry;
+       struct inode    *inode;
+       int             accmode = NFSD_MAY_SATTR;
+       umode_t         ftype = 0;
+       __be32          err;
+       int             host_err;
+       int             size_change = 0;
+
+       if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+               accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
+       if (iap->ia_valid & ATTR_SIZE)
+               ftype = S_IFREG;
+
+       /* Get inode */
+       err = fh_verify(rqstp, fhp, ftype, accmode);
+       if (err)
+               goto out;
+
+       dentry = fhp->fh_dentry;
+       inode = dentry->d_inode;
+
+       /* Ignore any mode updates on symlinks */
+       if (S_ISLNK(inode->i_mode))
+               iap->ia_valid &= ~ATTR_MODE;
+
+       if (!iap->ia_valid)
+               goto out;
+
+       nfsd_sanitize_attrs(inode, iap);
+
+       /*
+        * The size case is special, it changes the file in addition to the
+        * attributes.
+        */
+       if (iap->ia_valid & ATTR_SIZE) {
+               err = nfsd_get_write_access(rqstp, fhp, iap);
+               if (err)
+                       goto out;
+               size_change = 1;
        }
+
+       iap->ia_valid |= ATTR_CTIME;
+
+       if (check_guard && guardtime != inode->i_ctime.tv_sec) {
+               err = nfserr_notsync;
+               goto out_put_write_access;
+       }
+
+       host_err = nfsd_break_lease(inode);
+       if (host_err)
+               goto out_put_write_access_nfserror;
+
+       fh_lock(fhp);
+       host_err = notify_change(dentry, iap, NULL);
+       fh_unlock(fhp);
+
+out_put_write_access_nfserror:
+       err = nfserrno(host_err);
+out_put_write_access:
        if (size_change)
                put_write_access(inode);
        if (!err)
                commit_metadata(fhp);
 out:
        return err;
-
-out_nfserr:
-       err = nfserrno(host_err);
-       goto out;
 }
 
 #if defined(CONFIG_NFSD_V2_ACL) || \
index 1485e38daaa38100278f56e710a8233338693fd5..03c8d747be48be2a14e93fed94355d42c5d7bd52 100644 (file)
@@ -1151,10 +1151,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
                goto out_free_page;
 
        }
-       kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
-       if (!uid_valid(kloginuid)) {
-               length = -EINVAL;
-               goto out_free_page;
+
+       /* is userspace tring to explicitly UNSET the loginuid? */
+       if (loginuid == AUDIT_UID_UNSET) {
+               kloginuid = INVALID_UID;
+       } else {
+               kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
+               if (!uid_valid(kloginuid)) {
+                       length = -EINVAL;
+                       goto out_free_page;
+               }
        }
 
        length = audit_set_loginuid(kloginuid);
index 737e15615b0490c40d002a315217033f5463d5b3..cca93b6fb9a9e841cc480308968771125fb7cd87 100644 (file)
@@ -174,22 +174,6 @@ static const struct inode_operations proc_link_inode_operations = {
        .follow_link    = proc_follow_link,
 };
 
-/*
- * As some entries in /proc are volatile, we want to 
- * get rid of unused dentries.  This could be made 
- * smarter: we could keep a "volatile" flag in the 
- * inode to indicate which ones to keep.
- */
-static int proc_delete_dentry(const struct dentry * dentry)
-{
-       return 1;
-}
-
-static const struct dentry_operations proc_dentry_operations =
-{
-       .d_delete       = proc_delete_dentry,
-};
-
 /*
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
@@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
                        inode = proc_get_inode(dir->i_sb, de);
                        if (!inode)
                                return ERR_PTR(-ENOMEM);
-                       d_set_d_op(dentry, &proc_dentry_operations);
+                       d_set_d_op(dentry, &simple_dentry_operations);
                        d_add(dentry, inode);
                        return NULL;
                }
index 49a7fff2e83a9906f39a685ddad0e98211dd155f..9ae46b87470dd9fe9fe6962c689abb4a7500e697 100644 (file)
@@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = {
        .setattr        = proc_setattr,
 };
 
-static int ns_delete_dentry(const struct dentry *dentry)
-{
-       /* Don't cache namespace inodes when not in use */
-       return 1;
-}
-
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 {
        struct inode *inode = dentry->d_inode;
@@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 
 const struct dentry_operations ns_dentry_operations =
 {
-       .d_delete       = ns_delete_dentry,
+       .d_delete       = always_delete_dentry,
        .d_dname        = ns_dname,
 };
 
index c70111ebefd44aaacf7248e10a5e80fecc06582a..b6fa8657dcbc51dcb904a7e5b018d474c94d9d83 100644 (file)
@@ -25,6 +25,78 @@ config SQUASHFS
 
          If unsure, say N.
 
+choice
+       prompt "File decompression options"
+       depends on SQUASHFS
+       help
+         Squashfs now supports two options for decompressing file
+         data.  Traditionally Squashfs has decompressed into an
+         intermediate buffer and then memcopied it into the page cache.
+         Squashfs now supports the ability to decompress directly into
+         the page cache.
+
+         If unsure, select "Decompress file data into an intermediate buffer"
+
+config SQUASHFS_FILE_CACHE
+       bool "Decompress file data into an intermediate buffer"
+       help
+         Decompress file data into an intermediate buffer and then
+         memcopy it into the page cache.
+
+config SQUASHFS_FILE_DIRECT
+       bool "Decompress files directly into the page cache"
+       help
+         Directly decompress file data into the page cache.
+         Doing so can significantly improve performance because
+         it eliminates a memcpy and it also removes the lock contention
+         on the single buffer.
+
+endchoice
+
+choice
+       prompt "Decompressor parallelisation options"
+       depends on SQUASHFS
+       help
+         Squashfs now supports three parallelisation options for
+         decompression.  Each one exhibits various trade-offs between
+         decompression performance and CPU and memory usage.
+
+         If in doubt, select "Single threaded compression"
+
+config SQUASHFS_DECOMP_SINGLE
+       bool "Single threaded compression"
+       help
+         Traditionally Squashfs has used single-threaded decompression.
+         Only one block (data or metadata) can be decompressed at any
+         one time.  This limits CPU and memory usage to a minimum.
+
+config SQUASHFS_DECOMP_MULTI
+       bool "Use multiple decompressors for parallel I/O"
+       help
+         By default Squashfs uses a single decompressor but it gives
+         poor performance on parallel I/O workloads when using multiple CPU
+         machines due to waiting on decompressor availability.
+
+         If you have a parallel I/O workload and your system has enough memory,
+         using this option may improve overall I/O performance.
+
+         This decompressor implementation uses up to two parallel
+         decompressors per core.  It dynamically allocates decompressors
+         on a demand basis.
+
+config SQUASHFS_DECOMP_MULTI_PERCPU
+       bool "Use percpu multiple decompressors for parallel I/O"
+       help
+         By default Squashfs uses a single decompressor but it gives
+         poor performance on parallel I/O workloads when using multiple CPU
+         machines due to waiting on decompressor availability.
+
+         This decompressor implementation uses a maximum of one
+         decompressor per core.  It uses percpu variables to ensure
+         decompression is load-balanced across the cores.
+
+endchoice
+
 config SQUASHFS_XATTR
        bool "Squashfs XATTR support"
        depends on SQUASHFS
index 110b0476f3b48a21e016dd8f4337476ddf37c3ce..4132520b4ff2cfbec2e49b5c0ebe67f8661dd748 100644 (file)
@@ -5,6 +5,11 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o decompressor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
 squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
 squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
index 41d108ecc9be305211a635bfdf7932ac52d91097..0cea9b9236d07c81d0cc46c0b22aeba334cc645d 100644 (file)
@@ -36,6 +36,7 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 /*
  * Read the metadata block length, this is stored in the first two
@@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb,
  * generated a larger block - this does occasionally happen with compression
  * algorithms).
  */
-int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
-                       int length, u64 *next_index, int srclength, int pages)
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+               u64 *next_index, struct squashfs_page_actor *output)
 {
        struct squashfs_sb_info *msblk = sb->s_fs_info;
        struct buffer_head **bh;
        int offset = index & ((1 << msblk->devblksize_log2) - 1);
        u64 cur_index = index >> msblk->devblksize_log2;
-       int bytes, compressed, b = 0, k = 0, page = 0, avail;
+       int bytes, compressed, b = 0, k = 0, avail, i;
 
-       bh = kcalloc(((srclength + msblk->devblksize - 1)
+       bh = kcalloc(((output->length + msblk->devblksize - 1)
                >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
        if (bh == NULL)
                return -ENOMEM;
@@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                        *next_index = index + length;
 
                TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
-                       index, compressed ? "" : "un", length, srclength);
+                       index, compressed ? "" : "un", length, output->length);
 
-               if (length < 0 || length > srclength ||
+               if (length < 0 || length > output->length ||
                                (index + length) > msblk->bytes_used)
                        goto read_failure;
 
@@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
                                compressed ? "" : "un", length);
 
-               if (length < 0 || length > srclength ||
+               if (length < 0 || length > output->length ||
                                        (index + length) > msblk->bytes_used)
                        goto block_release;
 
@@ -158,9 +159,15 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                ll_rw_block(READ, b - 1, bh + 1);
        }
 
+       for (i = 0; i < b; i++) {
+               wait_on_buffer(bh[i]);
+               if (!buffer_uptodate(bh[i]))
+                       goto block_release;
+       }
+
        if (compressed) {
-               length = squashfs_decompress(msblk, buffer, bh, b, offset,
-                        length, srclength, pages);
+               length = squashfs_decompress(msblk, bh, b, offset, length,
+                       output);
                if (length < 0)
                        goto read_failure;
        } else {
@@ -168,22 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                 * Block is uncompressed.
                 */
                int in, pg_offset = 0;
+               void *data = squashfs_first_page(output);
 
                for (bytes = length; k < b; k++) {
                        in = min(bytes, msblk->devblksize - offset);
                        bytes -= in;
-                       wait_on_buffer(bh[k]);
-                       if (!buffer_uptodate(bh[k]))
-                               goto block_release;
                        while (in) {
                                if (pg_offset == PAGE_CACHE_SIZE) {
-                                       page++;
+                                       data = squashfs_next_page(output);
                                        pg_offset = 0;
                                }
                                avail = min_t(int, in, PAGE_CACHE_SIZE -
                                                pg_offset);
-                               memcpy(buffer[page] + pg_offset,
-                                               bh[k]->b_data + offset, avail);
+                               memcpy(data + pg_offset, bh[k]->b_data + offset,
+                                               avail);
                                in -= avail;
                                pg_offset += avail;
                                offset += avail;
@@ -191,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                        offset = 0;
                        put_bh(bh[k]);
                }
+               squashfs_finish_page(output);
        }
 
        kfree(bh);
index af0b738025929b1c18ac884734131ea5e843e40c..1cb70a0b216844a136bf3b47ee6534d22496bb1c 100644 (file)
@@ -56,6 +56,7 @@
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
+#include "page_actor.h"
 
 /*
  * Look-up block in cache, and increment usage count.  If not in cache, read
@@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
                        entry->error = 0;
                        spin_unlock(&cache->lock);
 
-                       entry->length = squashfs_read_data(sb, entry->data,
-                               block, length, &entry->next_index,
-                               cache->block_size, cache->pages);
+                       entry->length = squashfs_read_data(sb, block, length,
+                               &entry->next_index, entry->actor);
 
                        spin_lock(&cache->lock);
 
@@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
                                kfree(cache->entry[i].data[j]);
                        kfree(cache->entry[i].data);
                }
+               kfree(cache->entry[i].actor);
        }
 
        kfree(cache->entry);
@@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
                                goto cleanup;
                        }
                }
+
+               entry->actor = squashfs_page_actor_init(entry->data,
+                                               cache->pages, 0);
+               if (entry->actor == NULL) {
+                       ERROR("Failed to allocate %s cache entry\n", name);
+                       goto cleanup;
+               }
        }
 
        return cache;
@@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
        int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        int i, res;
        void *table, *buffer, **data;
+       struct squashfs_page_actor *actor;
 
        table = buffer = kmalloc(length, GFP_KERNEL);
        if (table == NULL)
@@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
                goto failed;
        }
 
+       actor = squashfs_page_actor_init(data, pages, length);
+       if (actor == NULL) {
+               res = -ENOMEM;
+               goto failed2;
+       }
+
        for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
                data[i] = buffer;
 
-       res = squashfs_read_data(sb, data, block, length |
-               SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
+       res = squashfs_read_data(sb, block, length |
+               SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
 
        kfree(data);
+       kfree(actor);
 
        if (res < 0)
                goto failed;
 
        return table;
 
+failed2:
+       kfree(data);
 failed:
        kfree(table);
        return ERR_PTR(res);
index 3f6271d86abc48ba3132ccfdd5668e118799e249..ac22fe73b0adc241449c35e58faf535d510bb3ec 100644 (file)
@@ -30,6 +30,7 @@
 #include "squashfs_fs_sb.h"
 #include "decompressor.h"
 #include "squashfs.h"
+#include "page_actor.h"
 
 /*
  * This file (and decompressor.h) implements a decompressor framework for
  */
 
 static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
-       NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+       NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
 };
 
 #ifndef CONFIG_SQUASHFS_LZO
 static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
-       NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+       NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
 };
 #endif
 
 #ifndef CONFIG_SQUASHFS_XZ
 static const struct squashfs_decompressor squashfs_xz_comp_ops = {
-       NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+       NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
 };
 #endif
 
 #ifndef CONFIG_SQUASHFS_ZLIB
 static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
-       NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+       NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
 };
 #endif
 
 static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
-       NULL, NULL, NULL, 0, "unknown", 0
+       NULL, NULL, NULL, NULL, 0, "unknown", 0
 };
 
 static const struct squashfs_decompressor *decompressor[] = {
@@ -83,10 +84,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
 }
 
 
-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
+static void *get_comp_opts(struct super_block *sb, unsigned short flags)
 {
        struct squashfs_sb_info *msblk = sb->s_fs_info;
-       void *strm, *buffer = NULL;
+       void *buffer = NULL, *comp_opts;
+       struct squashfs_page_actor *actor = NULL;
        int length = 0;
 
        /*
@@ -94,23 +96,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
         */
        if (SQUASHFS_COMP_OPTS(flags)) {
                buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
-               if (buffer == NULL)
-                       return ERR_PTR(-ENOMEM);
+               if (buffer == NULL) {
+                       comp_opts = ERR_PTR(-ENOMEM);
+                       goto out;
+               }
+
+               actor = squashfs_page_actor_init(&buffer, 1, 0);
+               if (actor == NULL) {
+                       comp_opts = ERR_PTR(-ENOMEM);
+                       goto out;
+               }
 
-               length = squashfs_read_data(sb, &buffer,
-                       sizeof(struct squashfs_super_block), 0, NULL,
-                       PAGE_CACHE_SIZE, 1);
+               length = squashfs_read_data(sb,
+                       sizeof(struct squashfs_super_block), 0, NULL, actor);
 
                if (length < 0) {
-                       strm = ERR_PTR(length);
-                       goto finished;
+                       comp_opts = ERR_PTR(length);
+                       goto out;
                }
        }
 
-       strm = msblk->decompressor->init(msblk, buffer, length);
+       comp_opts = squashfs_comp_opts(msblk, buffer, length);
 
-finished:
+out:
+       kfree(actor);
        kfree(buffer);
+       return comp_opts;
+}
+
+
+void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
+{
+       struct squashfs_sb_info *msblk = sb->s_fs_info;
+       void *stream, *comp_opts = get_comp_opts(sb, flags);
+
+       if (IS_ERR(comp_opts))
+               return comp_opts;
+
+       stream = squashfs_decompressor_create(msblk, comp_opts);
+       if (IS_ERR(stream))
+               kfree(comp_opts);
 
-       return strm;
+       return stream;
 }
index 330073e29029950238da75d90fbd86a9dc8c881a..af0985321808e6e92b771b7d3ea63442e5fc8581 100644 (file)
  */
 
 struct squashfs_decompressor {
-       void    *(*init)(struct squashfs_sb_info *, void *, int);
+       void    *(*init)(struct squashfs_sb_info *, void *);
+       void    *(*comp_opts)(struct squashfs_sb_info *, void *, int);
        void    (*free)(void *);
-       int     (*decompress)(struct squashfs_sb_info *, void **,
-               struct buffer_head **, int, int, int, int, int);
+       int     (*decompress)(struct squashfs_sb_info *, void *,
+               struct buffer_head **, int, int, int,
+               struct squashfs_page_actor *);
        int     id;
        char    *name;
        int     supported;
 };
 
-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
-       void *s)
+static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
+                                                       void *buff, int length)
 {
-       if (msblk->decompressor)
-               msblk->decompressor->free(s);
-}
-
-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
-       void **buffer, struct buffer_head **bh, int b, int offset, int length,
-       int srclength, int pages)
-{
-       return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
-               length, srclength, pages);
+       return msblk->decompressor->comp_opts ?
+               msblk->decompressor->comp_opts(msblk, buff, length) : NULL;
 }
 
 #ifdef CONFIG_SQUASHFS_XZ
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
new file mode 100644 (file)
index 0000000..d6008a6
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ *  Copyright (c) 2013
+ *  Minchan Kim <minchan@kernel.org>
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/cpumask.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression in the
+ * decompressor framework
+ */
+
+
+/*
+ * The reason that multiply two is that a CPU can request new I/O
+ * while it is waiting previous request.
+ */
+#define MAX_DECOMPRESSOR       (num_online_cpus() * 2)
+
+
+int squashfs_max_decompressors(void)
+{
+       return MAX_DECOMPRESSOR;
+}
+
+
+struct squashfs_stream {
+       void                    *comp_opts;
+       struct list_head        strm_list;
+       struct mutex            mutex;
+       int                     avail_decomp;
+       wait_queue_head_t       wait;
+};
+
+
+struct decomp_stream {
+       void *stream;
+       struct list_head list;
+};
+
+
+static void put_decomp_stream(struct decomp_stream *decomp_strm,
+                               struct squashfs_stream *stream)
+{
+       mutex_lock(&stream->mutex);
+       list_add(&decomp_strm->list, &stream->strm_list);
+       mutex_unlock(&stream->mutex);
+       wake_up(&stream->wait);
+}
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                               void *comp_opts)
+{
+       struct squashfs_stream *stream;
+       struct decomp_stream *decomp_strm = NULL;
+       int err = -ENOMEM;
+
+       stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+       if (!stream)
+               goto out;
+
+       stream->comp_opts = comp_opts;
+       mutex_init(&stream->mutex);
+       INIT_LIST_HEAD(&stream->strm_list);
+       init_waitqueue_head(&stream->wait);
+
+       /*
+        * We should have a decompressor at least as default
+        * so if we fail to allocate new decompressor dynamically,
+        * we could always fall back to default decompressor and
+        * file system works.
+        */
+       decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+       if (!decomp_strm)
+               goto out;
+
+       decomp_strm->stream = msblk->decompressor->init(msblk,
+                                               stream->comp_opts);
+       if (IS_ERR(decomp_strm->stream)) {
+               err = PTR_ERR(decomp_strm->stream);
+               goto out;
+       }
+
+       list_add(&decomp_strm->list, &stream->strm_list);
+       stream->avail_decomp = 1;
+       return stream;
+
+out:
+       kfree(decomp_strm);
+       kfree(stream);
+       return ERR_PTR(err);
+}
+
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+       struct squashfs_stream *stream = msblk->stream;
+       if (stream) {
+               struct decomp_stream *decomp_strm;
+
+               while (!list_empty(&stream->strm_list)) {
+                       decomp_strm = list_entry(stream->strm_list.prev,
+                                               struct decomp_stream, list);
+                       list_del(&decomp_strm->list);
+                       msblk->decompressor->free(decomp_strm->stream);
+                       kfree(decomp_strm);
+                       stream->avail_decomp--;
+               }
+               WARN_ON(stream->avail_decomp);
+               kfree(stream->comp_opts);
+               kfree(stream);
+       }
+}
+
+
+static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
+                                       struct squashfs_stream *stream)
+{
+       struct decomp_stream *decomp_strm;
+
+       while (1) {
+               mutex_lock(&stream->mutex);
+
+               /* There is available decomp_stream */
+               if (!list_empty(&stream->strm_list)) {
+                       decomp_strm = list_entry(stream->strm_list.prev,
+                               struct decomp_stream, list);
+                       list_del(&decomp_strm->list);
+                       mutex_unlock(&stream->mutex);
+                       break;
+               }
+
+               /*
+                * If there is no available decomp and already full,
+                * let's wait for releasing decomp from other users.
+                */
+               if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+                       goto wait;
+
+               /* Let's allocate new decomp */
+               decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+               if (!decomp_strm)
+                       goto wait;
+
+               decomp_strm->stream = msblk->decompressor->init(msblk,
+                                               stream->comp_opts);
+               if (IS_ERR(decomp_strm->stream)) {
+                       kfree(decomp_strm);
+                       goto wait;
+               }
+
+               stream->avail_decomp++;
+               WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+
+               mutex_unlock(&stream->mutex);
+               break;
+wait:
+               /*
+                * If system memory is tough, let's for other's
+                * releasing instead of hurting VM because it could
+                * make page cache thrashing.
+                */
+               mutex_unlock(&stream->mutex);
+               wait_event(stream->wait,
+                       !list_empty(&stream->strm_list));
+       }
+
+       return decomp_strm;
+}
+
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+       int b, int offset, int length, struct squashfs_page_actor *output)
+{
+       int res;
+       struct squashfs_stream *stream = msblk->stream;
+       struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
+       res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
+               bh, b, offset, length, output);
+       put_decomp_stream(decomp_stream, stream);
+       if (res < 0)
+               ERROR("%s decompression failed, data probably corrupt\n",
+                       msblk->decompressor->name);
+       return res;
+}
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
new file mode 100644 (file)
index 0000000..23a9c28
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/percpu.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression using percpu
+ * variables, one thread per cpu core.
+ */
+
+struct squashfs_stream {
+       void            *stream;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                                               void *comp_opts)
+{
+       struct squashfs_stream *stream;
+       struct squashfs_stream __percpu *percpu;
+       int err, cpu;
+
+       percpu = alloc_percpu(struct squashfs_stream);
+       if (percpu == NULL)
+               return ERR_PTR(-ENOMEM);
+
+       for_each_possible_cpu(cpu) {
+               stream = per_cpu_ptr(percpu, cpu);
+               stream->stream = msblk->decompressor->init(msblk, comp_opts);
+               if (IS_ERR(stream->stream)) {
+                       err = PTR_ERR(stream->stream);
+                       goto out;
+               }
+       }
+
+       kfree(comp_opts);
+       return (__force void *) percpu;
+
+out:
+       for_each_possible_cpu(cpu) {
+               stream = per_cpu_ptr(percpu, cpu);
+               if (!IS_ERR_OR_NULL(stream->stream))
+                       msblk->decompressor->free(stream->stream);
+       }
+       free_percpu(percpu);
+       return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+       struct squashfs_stream __percpu *percpu =
+                       (struct squashfs_stream __percpu *) msblk->stream;
+       struct squashfs_stream *stream;
+       int cpu;
+
+       if (msblk->stream) {
+               for_each_possible_cpu(cpu) {
+                       stream = per_cpu_ptr(percpu, cpu);
+                       msblk->decompressor->free(stream->stream);
+               }
+               free_percpu(percpu);
+       }
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+       int b, int offset, int length, struct squashfs_page_actor *output)
+{
+       struct squashfs_stream __percpu *percpu =
+                       (struct squashfs_stream __percpu *) msblk->stream;
+       struct squashfs_stream *stream = get_cpu_ptr(percpu);
+       int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+               offset, length, output);
+       put_cpu_ptr(stream);
+
+       if (res < 0)
+               ERROR("%s decompression failed, data probably corrupt\n",
+                       msblk->decompressor->name);
+
+       return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+       return num_possible_cpus();
+}
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
new file mode 100644 (file)
index 0000000..a6c7592
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements single-threaded decompression in the
+ * decompressor framework
+ */
+
+struct squashfs_stream {
+       void            *stream;
+       struct mutex    mutex;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                                               void *comp_opts)
+{
+       struct squashfs_stream *stream;
+       int err = -ENOMEM;
+
+       stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+       if (stream == NULL)
+               goto out;
+
+       stream->stream = msblk->decompressor->init(msblk, comp_opts);
+       if (IS_ERR(stream->stream)) {
+               err = PTR_ERR(stream->stream);
+               goto out;
+       }
+
+       kfree(comp_opts);
+       mutex_init(&stream->mutex);
+       return stream;
+
+out:
+       kfree(stream);
+       return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+       struct squashfs_stream *stream = msblk->stream;
+
+       if (stream) {
+               msblk->decompressor->free(stream->stream);
+               kfree(stream);
+       }
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+       int b, int offset, int length, struct squashfs_page_actor *output)
+{
+       int res;
+       struct squashfs_stream *stream = msblk->stream;
+
+       mutex_lock(&stream->mutex);
+       res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+               offset, length, output);
+       mutex_unlock(&stream->mutex);
+
+       if (res < 0)
+               ERROR("%s decompression failed, data probably corrupt\n",
+                       msblk->decompressor->name);
+
+       return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+       return 1;
+}
index 8ca62c28fe1249fd40d7b8e01612b1dedc5ca704..e5c9689062ba81fff5db08f50c2d39d53c4508d9 100644 (file)
@@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
        return le32_to_cpu(size);
 }
 
-
-static int squashfs_readpage(struct file *file, struct page *page)
+/* Copy data into page cache  */
+void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
+       int bytes, int offset)
 {
        struct inode *inode = page->mapping->host;
        struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-       int bytes, i, offset = 0, sparse = 0;
-       struct squashfs_cache_entry *buffer = NULL;
        void *pageaddr;
-
-       int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
-       int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
-       int start_index = page->index & ~mask;
-       int end_index = start_index | mask;
-       int file_end = i_size_read(inode) >> msblk->block_log;
-
-       TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
-                               page->index, squashfs_i(inode)->start);
-
-       if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-                                       PAGE_CACHE_SHIFT))
-               goto out;
-
-       if (index < file_end || squashfs_i(inode)->fragment_block ==
-                                       SQUASHFS_INVALID_BLK) {
-               /*
-                * Reading a datablock from disk.  Need to read block list
-                * to get location and block size.
-                */
-               u64 block = 0;
-               int bsize = read_blocklist(inode, index, &block);
-               if (bsize < 0)
-                       goto error_out;
-
-               if (bsize == 0) { /* hole */
-                       bytes = index == file_end ?
-                               (i_size_read(inode) & (msblk->block_size - 1)) :
-                                msblk->block_size;
-                       sparse = 1;
-               } else {
-                       /*
-                        * Read and decompress datablock.
-                        */
-                       buffer = squashfs_get_datablock(inode->i_sb,
-                                                               block, bsize);
-                       if (buffer->error) {
-                               ERROR("Unable to read page, block %llx, size %x"
-                                       "\n", block, bsize);
-                               squashfs_cache_put(buffer);
-                               goto error_out;
-                       }
-                       bytes = buffer->length;
-               }
-       } else {
-               /*
-                * Datablock is stored inside a fragment (tail-end packed
-                * block).
-                */
-               buffer = squashfs_get_fragment(inode->i_sb,
-                               squashfs_i(inode)->fragment_block,
-                               squashfs_i(inode)->fragment_size);
-
-               if (buffer->error) {
-                       ERROR("Unable to read page, block %llx, size %x\n",
-                               squashfs_i(inode)->fragment_block,
-                               squashfs_i(inode)->fragment_size);
-                       squashfs_cache_put(buffer);
-                       goto error_out;
-               }
-               bytes = i_size_read(inode) & (msblk->block_size - 1);
-               offset = squashfs_i(inode)->fragment_offset;
-       }
+       int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+       int start_index = page->index & ~mask, end_index = start_index | mask;
 
        /*
         * Loop copying datablock into pages.  As the datablock likely covers
@@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page)
        for (i = start_index; i <= end_index && bytes > 0; i++,
                        bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
                struct page *push_page;
-               int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
+               int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
 
                TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
 
@@ -475,11 +413,75 @@ skip_page:
                if (i != page->index)
                        page_cache_release(push_page);
        }
+}
+
+/* Read datablock stored packed inside a fragment (tail-end packed block) */
+static int squashfs_readpage_fragment(struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+       struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
+               squashfs_i(inode)->fragment_block,
+               squashfs_i(inode)->fragment_size);
+       int res = buffer->error;
+
+       if (res)
+               ERROR("Unable to read page, block %llx, size %x\n",
+                       squashfs_i(inode)->fragment_block,
+                       squashfs_i(inode)->fragment_size);
+       else
+               squashfs_copy_cache(page, buffer, i_size_read(inode) &
+                       (msblk->block_size - 1),
+                       squashfs_i(inode)->fragment_offset);
+
+       squashfs_cache_put(buffer);
+       return res;
+}
 
-       if (!sparse)
-               squashfs_cache_put(buffer);
+static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
+{
+       struct inode *inode = page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+       int bytes = index == file_end ?
+                       (i_size_read(inode) & (msblk->block_size - 1)) :
+                        msblk->block_size;
 
+       squashfs_copy_cache(page, NULL, bytes, 0);
        return 0;
+}
+
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+       int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+       int file_end = i_size_read(inode) >> msblk->block_log;
+       int res;
+       void *pageaddr;
+
+       TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+                               page->index, squashfs_i(inode)->start);
+
+       if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+                                       PAGE_CACHE_SHIFT))
+               goto out;
+
+       if (index < file_end || squashfs_i(inode)->fragment_block ==
+                                       SQUASHFS_INVALID_BLK) {
+               u64 block = 0;
+               int bsize = read_blocklist(inode, index, &block);
+               if (bsize < 0)
+                       goto error_out;
+
+               if (bsize == 0)
+                       res = squashfs_readpage_sparse(page, index, file_end);
+               else
+                       res = squashfs_readpage_block(page, block, bsize);
+       } else
+               res = squashfs_readpage_fragment(page);
+
+       if (!res)
+               return 0;
 
 error_out:
        SetPageError(page);
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
new file mode 100644 (file)
index 0000000..f2310d2
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+{
+       struct inode *i = page->mapping->host;
+       struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+               block, bsize);
+       int res = buffer->error;
+
+       if (res)
+               ERROR("Unable to read page, block %llx, size %x\n", block,
+                       bsize);
+       else
+               squashfs_copy_cache(page, buffer, buffer->length, 0);
+
+       squashfs_cache_put(buffer);
+       return res;
+}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
new file mode 100644 (file)
index 0000000..2943b2b
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "page_actor.h"
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+       int pages, struct page **page);
+
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+
+{
+       struct inode *inode = target_page->mapping->host;
+       struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+       int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+       int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+       int start_index = target_page->index & ~mask;
+       int end_index = start_index | mask;
+       int i, n, pages, missing_pages, bytes, res = -ENOMEM;
+       struct page **page;
+       struct squashfs_page_actor *actor;
+       void *pageaddr;
+
+       if (end_index > file_end)
+               end_index = file_end;
+
+       pages = end_index - start_index + 1;
+
+       page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+       if (page == NULL)
+               return res;
+
+       /*
+        * Create a "page actor" which will kmap and kunmap the
+        * page cache pages appropriately within the decompressor
+        */
+       actor = squashfs_page_actor_init_special(page, pages, 0);
+       if (actor == NULL)
+               goto out;
+
+       /* Try to grab all the pages covered by the Squashfs block */
+       for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+               page[i] = (n == target_page->index) ? target_page :
+                       grab_cache_page_nowait(target_page->mapping, n);
+
+               if (page[i] == NULL) {
+                       missing_pages++;
+                       continue;
+               }
+
+               if (PageUptodate(page[i])) {
+                       unlock_page(page[i]);
+                       page_cache_release(page[i]);
+                       page[i] = NULL;
+                       missing_pages++;
+               }
+       }
+
+       if (missing_pages) {
+               /*
+                * Couldn't get one or more pages, this page has either
+                * been VM reclaimed, but others are still in the page cache
+                * and uptodate, or we're racing with another thread in
+                * squashfs_readpage also trying to grab them.  Fall back to
+                * using an intermediate buffer.
+                */
+               res = squashfs_read_cache(target_page, block, bsize, pages,
+                                                               page);
+               goto out;
+       }
+
+       /* Decompress directly into the page cache buffers */
+       res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+       if (res < 0)
+               goto mark_errored;
+
+       /* Last page may have trailing bytes not filled */
+       bytes = res % PAGE_CACHE_SIZE;
+       if (bytes) {
+               pageaddr = kmap_atomic(page[pages - 1]);
+               memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+               kunmap_atomic(pageaddr);
+       }
+
+       /* Mark pages as uptodate, unlock and release */
+       for (i = 0; i < pages; i++) {
+               flush_dcache_page(page[i]);
+               SetPageUptodate(page[i]);
+               unlock_page(page[i]);
+               if (page[i] != target_page)
+                       page_cache_release(page[i]);
+       }
+
+       kfree(actor);
+       kfree(page);
+
+       return 0;
+
+mark_errored:
+       /* Decompression failed, mark pages as errored.  Target_page is
+        * dealt with by the caller
+        */
+       for (i = 0; i < pages; i++) {
+               if (page[i] == target_page)
+                       continue;
+               flush_dcache_page(page[i]);
+               SetPageError(page[i]);
+               unlock_page(page[i]);
+               page_cache_release(page[i]);
+       }
+
+out:
+       kfree(actor);
+       kfree(page);
+       return res;
+}
+
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+       int pages, struct page **page)
+{
+       struct inode *i = target_page->mapping->host;
+       struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+                                                block, bsize);
+       int bytes = buffer->length, res = buffer->error, n, offset = 0;
+       void *pageaddr;
+
+       if (res) {
+               ERROR("Unable to read page, block %llx, size %x\n", block,
+                       bsize);
+               goto out;
+       }
+
+       for (n = 0; n < pages && bytes > 0; n++,
+                       bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+               int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+
+               if (page[n] == NULL)
+                       continue;
+
+               pageaddr = kmap_atomic(page[n]);
+               squashfs_copy_data(pageaddr, buffer, offset, avail);
+               memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+               kunmap_atomic(pageaddr);
+               flush_dcache_page(page[n]);
+               SetPageUptodate(page[n]);
+               unlock_page(page[n]);
+               if (page[n] != target_page)
+                       page_cache_release(page[n]);
+       }
+
+out:
+       squashfs_cache_put(buffer);
+       return res;
+}
index 00f4dfc5f0884cb6d77920130883521b04635784..244b9fbfff7b299195585320328cfc7540e6887e 100644 (file)
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 struct squashfs_lzo {
        void    *input;
        void    *output;
 };
 
-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
+static void *lzo_init(struct squashfs_sb_info *msblk, void *buff)
 {
        int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
 
@@ -74,22 +75,16 @@ static void lzo_free(void *strm)
 }
 
 
-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-       struct buffer_head **bh, int b, int offset, int length, int srclength,
-       int pages)
+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
+       struct buffer_head **bh, int b, int offset, int length,
+       struct squashfs_page_actor *output)
 {
-       struct squashfs_lzo *stream = msblk->stream;
-       void *buff = stream->input;
+       struct squashfs_lzo *stream = strm;
+       void *buff = stream->input, *data;
        int avail, i, bytes = length, res;
-       size_t out_len = srclength;
-
-       mutex_lock(&msblk->read_data_mutex);
+       size_t out_len = output->length;
 
        for (i = 0; i < b; i++) {
-               wait_on_buffer(bh[i]);
-               if (!buffer_uptodate(bh[i]))
-                       goto block_release;
-
                avail = min(bytes, msblk->devblksize - offset);
                memcpy(buff, bh[i]->b_data + offset, avail);
                buff += avail;
@@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                goto failed;
 
        res = bytes = (int)out_len;
-       for (i = 0, buff = stream->output; bytes && i < pages; i++) {
-               avail = min_t(int, bytes, PAGE_CACHE_SIZE);
-               memcpy(buffer[i], buff, avail);
-               buff += avail;
-               bytes -= avail;
+       data = squashfs_first_page(output);
+       buff = stream->output;
+       while (data) {
+               if (bytes <= PAGE_CACHE_SIZE) {
+                       memcpy(data, buff, bytes);
+                       break;
+               } else {
+                       memcpy(data, buff, PAGE_CACHE_SIZE);
+                       buff += PAGE_CACHE_SIZE;
+                       bytes -= PAGE_CACHE_SIZE;
+                       data = squashfs_next_page(output);
+               }
        }
+       squashfs_finish_page(output);
 
-       mutex_unlock(&msblk->read_data_mutex);
        return res;
 
-block_release:
-       for (; i < b; i++)
-               put_bh(bh[i]);
-
 failed:
-       mutex_unlock(&msblk->read_data_mutex);
-
-       ERROR("lzo decompression failed, data probably corrupt\n");
        return -EIO;
 }
 
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
new file mode 100644 (file)
index 0000000..5a1c11f
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include "page_actor.h"
+
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
+
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+       actor->next_page = 1;
+       return actor->buffer[0];
+}
+
+static void *cache_next_page(struct squashfs_page_actor *actor)
+{
+       if (actor->next_page == actor->pages)
+               return NULL;
+
+       return actor->buffer[actor->next_page++];
+}
+
+static void cache_finish_page(struct squashfs_page_actor *actor)
+{
+       /* empty */
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+       int pages, int length)
+{
+       struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+       if (actor == NULL)
+               return NULL;
+
+       actor->length = length ? : pages * PAGE_CACHE_SIZE;
+       actor->buffer = buffer;
+       actor->pages = pages;
+       actor->next_page = 0;
+       actor->squashfs_first_page = cache_first_page;
+       actor->squashfs_next_page = cache_next_page;
+       actor->squashfs_finish_page = cache_finish_page;
+       return actor;
+}
+
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
+{
+       actor->next_page = 1;
+       return actor->pageaddr = kmap_atomic(actor->page[0]);
+}
+
+static void *direct_next_page(struct squashfs_page_actor *actor)
+{
+       if (actor->pageaddr)
+               kunmap_atomic(actor->pageaddr);
+
+       return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+               kmap_atomic(actor->page[actor->next_page++]);
+}
+
+static void direct_finish_page(struct squashfs_page_actor *actor)
+{
+       if (actor->pageaddr)
+               kunmap_atomic(actor->pageaddr);
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+       int pages, int length)
+{
+       struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+       if (actor == NULL)
+               return NULL;
+
+       actor->length = length ? : pages * PAGE_CACHE_SIZE;
+       actor->page = page;
+       actor->pages = pages;
+       actor->next_page = 0;
+       actor->pageaddr = NULL;
+       actor->squashfs_first_page = direct_first_page;
+       actor->squashfs_next_page = direct_next_page;
+       actor->squashfs_finish_page = direct_finish_page;
+       return actor;
+}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
new file mode 100644 (file)
index 0000000..26dd820
--- /dev/null
@@ -0,0 +1,81 @@
+#ifndef PAGE_ACTOR_H
+#define PAGE_ACTOR_H
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
+struct squashfs_page_actor {
+       void    **page;
+       int     pages;
+       int     length;
+       int     next_page;
+};
+
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+       int pages, int length)
+{
+       struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+       if (actor == NULL)
+               return NULL;
+
+       actor->length = length ? : pages * PAGE_CACHE_SIZE;
+       actor->page = page;
+       actor->pages = pages;
+       actor->next_page = 0;
+       return actor;
+}
+
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+       actor->next_page = 1;
+       return actor->page[0];
+}
+
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+       return actor->next_page == actor->pages ? NULL :
+               actor->page[actor->next_page++];
+}
+
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+       /* empty */
+}
+#else
+struct squashfs_page_actor {
+       union {
+               void            **buffer;
+               struct page     **page;
+       };
+       void    *pageaddr;
+       void    *(*squashfs_first_page)(struct squashfs_page_actor *);
+       void    *(*squashfs_next_page)(struct squashfs_page_actor *);
+       void    (*squashfs_finish_page)(struct squashfs_page_actor *);
+       int     pages;
+       int     length;
+       int     next_page;
+};
+
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+                                                        **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+       return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+       return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+       actor->squashfs_finish_page(actor);
+}
+#endif
+#endif
index d1266516ed08494e93f4dc2ef00fe66be9216cb5..9e1bb79f7e6f09ea793d11d5e627483bddc048ea 100644 (file)
@@ -28,8 +28,8 @@
 #define WARNING(s, args...)    pr_warning("SQUASHFS: "s, ## args)
 
 /* block.c */
-extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
-                               int, int);
+extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
+                               struct squashfs_page_actor *);
 
 /* cache.c */
 extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
 
 /* decompressor.c */
 extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
+extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
+
+/* decompressor_xxx.c */
+extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
+extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
+extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
+       int, int, int, struct squashfs_page_actor *);
+extern int squashfs_max_decompressors(void);
 
 /* export.c */
 extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
@@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
 extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
                                u64, u64, unsigned int);
 
+/* file.c */
+void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
+                               int);
+
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int);
+
 /* id.c */
 extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
 extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
index 52934a22f29665a00f082b24aa57ce915e875913..1da565cb50c3d0f1e652671dd7a81577b598ca2f 100644 (file)
@@ -50,6 +50,7 @@ struct squashfs_cache_entry {
        wait_queue_head_t       wait_queue;
        struct squashfs_cache   *cache;
        void                    **data;
+       struct squashfs_page_actor      *actor;
 };
 
 struct squashfs_sb_info {
@@ -63,10 +64,9 @@ struct squashfs_sb_info {
        __le64                                  *id_table;
        __le64                                  *fragment_index;
        __le64                                  *xattr_id_table;
-       struct mutex                            read_data_mutex;
        struct mutex                            meta_index_mutex;
        struct meta_index                       *meta_index;
-       void                                    *stream;
+       struct squashfs_stream                  *stream;
        __le64                                  *inode_lookup_table;
        u64                                     inode_table;
        u64                                     directory_table;
index 60553a9053ca82b7264862e8168b61df0ce7de89..202df6312d4e8517a63bb9ff10b385738e54ab6e 100644 (file)
@@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
        msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
        msblk->devblksize_log2 = ffz(~msblk->devblksize);
 
-       mutex_init(&msblk->read_data_mutex);
        mutex_init(&msblk->meta_index_mutex);
 
        /*
@@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
                goto failed_mount;
 
        /* Allocate read_page block */
-       msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);
+       msblk->read_page = squashfs_cache_init("data",
+               squashfs_max_decompressors(), msblk->block_size);
        if (msblk->read_page == NULL) {
                ERROR("Failed to allocate read_page block\n");
                goto failed_mount;
        }
 
-       msblk->stream = squashfs_decompressor_init(sb, flags);
+       msblk->stream = squashfs_decompressor_setup(sb, flags);
        if (IS_ERR(msblk->stream)) {
                err = PTR_ERR(msblk->stream);
                msblk->stream = NULL;
@@ -336,7 +336,7 @@ failed_mount:
        squashfs_cache_delete(msblk->block_cache);
        squashfs_cache_delete(msblk->fragment_cache);
        squashfs_cache_delete(msblk->read_page);
-       squashfs_decompressor_free(msblk, msblk->stream);
+       squashfs_decompressor_destroy(msblk);
        kfree(msblk->inode_lookup_table);
        kfree(msblk->fragment_index);
        kfree(msblk->id_table);
@@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb)
                squashfs_cache_delete(sbi->block_cache);
                squashfs_cache_delete(sbi->fragment_cache);
                squashfs_cache_delete(sbi->read_page);
-               squashfs_decompressor_free(sbi, sbi->stream);
+               squashfs_decompressor_destroy(sbi);
                kfree(sbi->id_table);
                kfree(sbi->fragment_index);
                kfree(sbi->meta_index);
index 1760b7d108f66a55614102c43713ef315592374e..c609624e4b8a8cf88152310337c2533be7bec5cb 100644 (file)
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 struct squashfs_xz {
        struct xz_dec *state;
        struct xz_buf buf;
 };
 
-struct comp_opts {
+struct disk_comp_opts {
        __le32 dictionary_size;
        __le32 flags;
 };
 
-static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
-       int len)
+struct comp_opts {
+       int dict_size;
+};
+
+static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
+       void *buff, int len)
 {
-       struct comp_opts *comp_opts = buff;
-       struct squashfs_xz *stream;
-       int dict_size = msblk->block_size;
-       int err, n;
+       struct disk_comp_opts *comp_opts = buff;
+       struct comp_opts *opts;
+       int err = 0, n;
+
+       opts = kmalloc(sizeof(*opts), GFP_KERNEL);
+       if (opts == NULL) {
+               err = -ENOMEM;
+               goto out2;
+       }
 
        if (comp_opts) {
                /* check compressor options are the expected length */
                if (len < sizeof(*comp_opts)) {
                        err = -EIO;
-                       goto failed;
+                       goto out;
                }
 
-               dict_size = le32_to_cpu(comp_opts->dictionary_size);
+               opts->dict_size = le32_to_cpu(comp_opts->dictionary_size);
 
                /* the dictionary size should be 2^n or 2^n+2^(n+1) */
-               n = ffs(dict_size) - 1;
-               if (dict_size != (1 << n) && dict_size != (1 << n) +
+               n = ffs(opts->dict_size) - 1;
+               if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) +
                                                (1 << (n + 1))) {
                        err = -EIO;
-                       goto failed;
+                       goto out;
                }
-       }
+       } else
+               /* use defaults */
+               opts->dict_size = max_t(int, msblk->block_size,
+                                                       SQUASHFS_METADATA_SIZE);
+
+       return opts;
+
+out:
+       kfree(opts);
+out2:
+       return ERR_PTR(err);
+}
+
 
-       dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff)
+{
+       struct comp_opts *comp_opts = buff;
+       struct squashfs_xz *stream;
+       int err;
 
        stream = kmalloc(sizeof(*stream), GFP_KERNEL);
        if (stream == NULL) {
@@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
                goto failed;
        }
 
-       stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
+       stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size);
        if (stream->state == NULL) {
                kfree(stream);
                err = -ENOMEM;
@@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm)
 }
 
 
-static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-       struct buffer_head **bh, int b, int offset, int length, int srclength,
-       int pages)
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
+       struct buffer_head **bh, int b, int offset, int length,
+       struct squashfs_page_actor *output)
 {
        enum xz_ret xz_err;
-       int avail, total = 0, k = 0, page = 0;
-       struct squashfs_xz *stream = msblk->stream;
-
-       mutex_lock(&msblk->read_data_mutex);
+       int avail, total = 0, k = 0;
+       struct squashfs_xz *stream = strm;
 
        xz_dec_reset(stream->state);
        stream->buf.in_pos = 0;
        stream->buf.in_size = 0;
        stream->buf.out_pos = 0;
        stream->buf.out_size = PAGE_CACHE_SIZE;
-       stream->buf.out = buffer[page++];
+       stream->buf.out = squashfs_first_page(output);
 
        do {
                if (stream->buf.in_pos == stream->buf.in_size && k < b) {
                        avail = min(length, msblk->devblksize - offset);
                        length -= avail;
-                       wait_on_buffer(bh[k]);
-                       if (!buffer_uptodate(bh[k]))
-                               goto release_mutex;
-
                        stream->buf.in = bh[k]->b_data + offset;
                        stream->buf.in_size = avail;
                        stream->buf.in_pos = 0;
                        offset = 0;
                }
 
-               if (stream->buf.out_pos == stream->buf.out_size
-                                                       && page < pages) {
-                       stream->buf.out = buffer[page++];
-                       stream->buf.out_pos = 0;
-                       total += PAGE_CACHE_SIZE;
+               if (stream->buf.out_pos == stream->buf.out_size) {
+                       stream->buf.out = squashfs_next_page(output);
+                       if (stream->buf.out != NULL) {
+                               stream->buf.out_pos = 0;
+                               total += PAGE_CACHE_SIZE;
+                       }
                }
 
                xz_err = xz_dec_run(stream->state, &stream->buf);
@@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                        put_bh(bh[k++]);
        } while (xz_err == XZ_OK);
 
-       if (xz_err != XZ_STREAM_END) {
-               ERROR("xz_dec_run error, data probably corrupt\n");
-               goto release_mutex;
-       }
-
-       if (k < b) {
-               ERROR("xz_uncompress error, input remaining\n");
-               goto release_mutex;
-       }
+       squashfs_finish_page(output);
 
-       total += stream->buf.out_pos;
-       mutex_unlock(&msblk->read_data_mutex);
-       return total;
+       if (xz_err != XZ_STREAM_END || k < b)
+               goto out;
 
-release_mutex:
-       mutex_unlock(&msblk->read_data_mutex);
+       return total + stream->buf.out_pos;
 
+out:
        for (; k < b; k++)
                put_bh(bh[k]);
 
@@ -172,6 +184,7 @@ release_mutex:
 
 const struct squashfs_decompressor squashfs_xz_comp_ops = {
        .init = squashfs_xz_init,
+       .comp_opts = squashfs_xz_comp_opts,
        .free = squashfs_xz_free,
        .decompress = squashfs_xz_uncompress,
        .id = XZ_COMPRESSION,
index 55d918fd2d862605beb85dae54c3a177b776381d..8727caba6882209ad62102c0148a09b64421626a 100644 (file)
@@ -32,8 +32,9 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
-static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
+static void *zlib_init(struct squashfs_sb_info *dummy, void *buff)
 {
        z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
        if (stream == NULL)
@@ -61,44 +62,37 @@ static void zlib_free(void *strm)
 }
 
 
-static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-       struct buffer_head **bh, int b, int offset, int length, int srclength,
-       int pages)
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
+       struct buffer_head **bh, int b, int offset, int length,
+       struct squashfs_page_actor *output)
 {
-       int zlib_err, zlib_init = 0;
-       int k = 0, page = 0;
-       z_stream *stream = msblk->stream;
-
-       mutex_lock(&msblk->read_data_mutex);
+       int zlib_err, zlib_init = 0, k = 0;
+       z_stream *stream = strm;
 
-       stream->avail_out = 0;
+       stream->avail_out = PAGE_CACHE_SIZE;
+       stream->next_out = squashfs_first_page(output);
        stream->avail_in = 0;
 
        do {
                if (stream->avail_in == 0 && k < b) {
                        int avail = min(length, msblk->devblksize - offset);
                        length -= avail;
-                       wait_on_buffer(bh[k]);
-                       if (!buffer_uptodate(bh[k]))
-                               goto release_mutex;
-
                        stream->next_in = bh[k]->b_data + offset;
                        stream->avail_in = avail;
                        offset = 0;
                }
 
-               if (stream->avail_out == 0 && page < pages) {
-                       stream->next_out = buffer[page++];
-                       stream->avail_out = PAGE_CACHE_SIZE;
+               if (stream->avail_out == 0) {
+                       stream->next_out = squashfs_next_page(output);
+                       if (stream->next_out != NULL)
+                               stream->avail_out = PAGE_CACHE_SIZE;
                }
 
                if (!zlib_init) {
                        zlib_err = zlib_inflateInit(stream);
                        if (zlib_err != Z_OK) {
-                               ERROR("zlib_inflateInit returned unexpected "
-                                       "result 0x%x, srclength %d\n",
-                                       zlib_err, srclength);
-                               goto release_mutex;
+                               squashfs_finish_page(output);
+                               goto out;
                        }
                        zlib_init = 1;
                }
@@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                        put_bh(bh[k++]);
        } while (zlib_err == Z_OK);
 
-       if (zlib_err != Z_STREAM_END) {
-               ERROR("zlib_inflate error, data probably corrupt\n");
-               goto release_mutex;
-       }
+       squashfs_finish_page(output);
 
-       zlib_err = zlib_inflateEnd(stream);
-       if (zlib_err != Z_OK) {
-               ERROR("zlib_inflate error, data probably corrupt\n");
-               goto release_mutex;
-       }
+       if (zlib_err != Z_STREAM_END)
+               goto out;
 
-       if (k < b) {
-               ERROR("zlib_uncompress error, data remaining\n");
-               goto release_mutex;
-       }
+       zlib_err = zlib_inflateEnd(stream);
+       if (zlib_err != Z_OK)
+               goto out;
 
-       length = stream->total_out;
-       mutex_unlock(&msblk->read_data_mutex);
-       return length;
+       if (k < b)
+               goto out;
 
-release_mutex:
-       mutex_unlock(&msblk->read_data_mutex);
+       return stream->total_out;
 
+out:
        for (; k < b; k++)
                put_bh(bh[k]);
 
index 1c02da8bb7df5a0bf5729cd5375a0266731e545c..3ef11b22e7505c380feb6597113d5b150f7b1afb 100644 (file)
@@ -1137,6 +1137,7 @@ xfs_bmap_add_attrfork(
        int                     committed;      /* xaction was committed */
        int                     logflags;       /* logging flags */
        int                     error;          /* error return value */
+       int                     cancel_flags = 0;
 
        ASSERT(XFS_IFORK_Q(ip) == 0);
 
@@ -1147,19 +1148,20 @@ xfs_bmap_add_attrfork(
        if (rsvd)
                tp->t_flags |= XFS_TRANS_RESERVE;
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
-       if (error)
-               goto error0;
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
                        XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
                        XFS_QMOPT_RES_REGBLKS);
-       if (error) {
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
-               return error;
-       }
+       if (error)
+               goto trans_cancel;
+       cancel_flags |= XFS_TRANS_ABORT;
        if (XFS_IFORK_Q(ip))
-               goto error1;
+               goto trans_cancel;
        if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
                /*
                 * For inodes coming from pre-6.2 filesystems.
@@ -1169,7 +1171,7 @@ xfs_bmap_add_attrfork(
        }
        ASSERT(ip->i_d.di_anextents == 0);
 
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, 0);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
        switch (ip->i_d.di_format) {
@@ -1191,7 +1193,7 @@ xfs_bmap_add_attrfork(
        default:
                ASSERT(0);
                error = XFS_ERROR(EINVAL);
-               goto error1;
+               goto trans_cancel;
        }
 
        ASSERT(ip->i_afp == NULL);
@@ -1219,7 +1221,7 @@ xfs_bmap_add_attrfork(
        if (logflags)
                xfs_trans_log_inode(tp, ip, logflags);
        if (error)
-               goto error2;
+               goto bmap_cancel;
        if (!xfs_sb_version_hasattr(&mp->m_sb) ||
           (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
                __int64_t sbfields = 0;
@@ -1242,14 +1244,16 @@ xfs_bmap_add_attrfork(
 
        error = xfs_bmap_finish(&tp, &flist, &committed);
        if (error)
-               goto error2;
-       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-error2:
+               goto bmap_cancel;
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+
+bmap_cancel:
        xfs_bmap_cancel(&flist);
-error1:
+trans_cancel:
+       xfs_trans_cancel(tp, cancel_flags);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-error0:
-       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
        return error;
 }
 
index da88f167af78dbf04df4eb0c1cdcfcad7a5bd699..02df7b408a2623d6a32e7a2bc8285be9348ca646 100644 (file)
@@ -41,6 +41,7 @@
 #include "xfs_fsops.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_dinode.h"
 
 
 #ifdef HAVE_PERCPU_SB
@@ -718,8 +719,22 @@ xfs_mountfs(
         * Set the inode cluster size.
         * This may still be overridden by the file system
         * block size if it is larger than the chosen cluster size.
+        *
+        * For v5 filesystems, scale the cluster size with the inode size to
+        * keep a constant ratio of inode per cluster buffer, but only if mkfs
+        * has set the inode alignment value appropriately for larger cluster
+        * sizes.
         */
        mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               int     new_size = mp->m_inode_cluster_size;
+
+               new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+               if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+                       mp->m_inode_cluster_size = new_size;
+               xfs_info(mp, "Using inode cluster size of %d bytes",
+                        mp->m_inode_cluster_size);
+       }
 
        /*
         * Set inode alignment fields
index 1d8101a10d8eecb1644b203591068baa1a434493..a466c5e5826eed27b489f0018c024362214123c3 100644 (file)
@@ -112,7 +112,7 @@ typedef struct xfs_mount {
        __uint8_t               m_blkbb_log;    /* blocklog - BBSHIFT */
        __uint8_t               m_agno_log;     /* log #ag's */
        __uint8_t               m_agino_log;    /* #bits for agino in inum */
-       __uint16_t              m_inode_cluster_size;/* min inode buf size */
+       uint                    m_inode_cluster_size;/* min inode buf size */
        uint                    m_blockmask;    /* sb_blocksize-1 */
        uint                    m_blockwsize;   /* sb_blocksize in words */
        uint                    m_blockwmask;   /* blockwsize-1 */
index 1bba7f60d94cab1fe153b073b8ca42f24fbd4bfc..50c3f5614288febe4c85fdb7c231887527e80f79 100644 (file)
@@ -111,12 +111,14 @@ xfs_trans_log_inode(
 
        /*
         * First time we log the inode in a transaction, bump the inode change
-        * counter if it is configured for this to occur.
+        * counter if it is configured for this to occur. We don't use
+        * inode_inc_version() because there is no need for extra locking around
+        * i_version as we already hold the inode locked exclusively for
+        * metadata modification.
         */
        if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
            IS_I_VERSION(VFS_I(ip))) {
-               inode_inc_iversion(VFS_I(ip));
-               ip->i_d.di_changecount = VFS_I(ip)->i_version;
+               ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
                flags |= XFS_ILOG_CORE;
        }
 
index d53d9f0627a779cacab8adaa5aea71be36f8e41e..2fd59c0dae667b58029bad83b56dfad4958e4e0c 100644 (file)
@@ -385,8 +385,7 @@ xfs_calc_ifree_reservation(
                xfs_calc_inode_res(mp, 1) +
                xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
                xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-               MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
-                   XFS_INODE_CLUSTER_SIZE(mp)) +
+               max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) +
                xfs_calc_buf_res(1, 0) +
                xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
                                 mp->m_in_maxlevels, 0) +
index 89c60b0f640819c7ed93b40774d5b9c3f3cbadfb..7b2de026a4f3db11c730d9d3abfa8b903ba5713f 100644 (file)
@@ -431,9 +431,9 @@ static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr)
 {
        return acpi_find_child(handle, addr, false);
 }
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr);
 int acpi_is_root_bridge(acpi_handle);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
-#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
 
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
diff --git a/include/crypto/hash_info.h b/include/crypto/hash_info.h
new file mode 100644 (file)
index 0000000..e1e5a3e
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_HASH_INFO_H
+#define _CRYPTO_HASH_INFO_H
+
+#include <crypto/sha.h>
+#include <crypto/md5.h>
+
+#include <uapi/linux/hash_info.h>
+
+/* not defined in include/crypto/ */
+#define RMD128_DIGEST_SIZE      16
+#define RMD160_DIGEST_SIZE     20
+#define RMD256_DIGEST_SIZE      32
+#define RMD320_DIGEST_SIZE      40
+
+/* not defined in include/crypto/ */
+#define WP512_DIGEST_SIZE      64
+#define WP384_DIGEST_SIZE      48
+#define WP256_DIGEST_SIZE      32
+
+/* not defined in include/crypto/ */
+#define TGR128_DIGEST_SIZE 16
+#define TGR160_DIGEST_SIZE 20
+#define TGR192_DIGEST_SIZE 24
+
+extern const char *const hash_algo_name[HASH_ALGO__LAST];
+extern const int hash_digest_size[HASH_ALGO__LAST];
+
+#endif /* _CRYPTO_HASH_INFO_H */
index f5b0224c99679ed23b8475a75b78992d3ca8c899..fc09732613adbe98985083a0ca8fc0ec5ea334a9 100644 (file)
@@ -15,6 +15,7 @@
 #define _LINUX_PUBLIC_KEY_H
 
 #include <linux/mpi.h>
+#include <crypto/hash_info.h>
 
 enum pkey_algo {
        PKEY_ALGO_DSA,
@@ -22,21 +23,11 @@ enum pkey_algo {
        PKEY_ALGO__LAST
 };
 
-extern const char *const pkey_algo[PKEY_ALGO__LAST];
+extern const char *const pkey_algo_name[PKEY_ALGO__LAST];
+extern const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST];
 
-enum pkey_hash_algo {
-       PKEY_HASH_MD4,
-       PKEY_HASH_MD5,
-       PKEY_HASH_SHA1,
-       PKEY_HASH_RIPE_MD_160,
-       PKEY_HASH_SHA256,
-       PKEY_HASH_SHA384,
-       PKEY_HASH_SHA512,
-       PKEY_HASH_SHA224,
-       PKEY_HASH__LAST
-};
-
-extern const char *const pkey_hash_algo[PKEY_HASH__LAST];
+/* asymmetric key implementation supports only up to SHA224 */
+#define PKEY_HASH__LAST                (HASH_ALGO_SHA224 + 1)
 
 enum pkey_id_type {
        PKEY_ID_PGP,            /* OpenPGP generated key ID */
@@ -44,7 +35,7 @@ enum pkey_id_type {
        PKEY_ID_TYPE__LAST
 };
 
-extern const char *const pkey_id_type[PKEY_ID_TYPE__LAST];
+extern const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST];
 
 /*
  * Cryptographic data for the public-key subtype of the asymmetric key type.
@@ -59,6 +50,7 @@ struct public_key {
 #define PKEY_CAN_DECRYPT       0x02
 #define PKEY_CAN_SIGN          0x04
 #define PKEY_CAN_VERIFY                0x08
+       enum pkey_algo pkey_algo : 8;
        enum pkey_id_type id_type : 8;
        union {
                MPI     mpi[5];
@@ -88,7 +80,8 @@ struct public_key_signature {
        u8 *digest;
        u8 digest_size;                 /* Number of bytes in digest */
        u8 nr_mpi;                      /* Occupancy of mpi[] */
-       enum pkey_hash_algo pkey_hash_algo : 8;
+       enum pkey_algo pkey_algo : 8;
+       enum hash_algo pkey_hash_algo : 8;
        union {
                MPI mpi[2];
                struct {
diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h
new file mode 100644 (file)
index 0000000..d69bc8a
--- /dev/null
@@ -0,0 +1,25 @@
+/* Big capacity key type.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_BIG_KEY_TYPE_H
+#define _KEYS_BIG_KEY_TYPE_H
+
+#include <linux/key-type.h>
+
+extern struct key_type key_type_big_key;
+
+extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
+extern void big_key_revoke(struct key *key);
+extern void big_key_destroy(struct key *key);
+extern void big_key_describe(const struct key *big_key, struct seq_file *m);
+extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen);
+
+#endif /* _KEYS_BIG_KEY_TYPE_H */
index cf49159b0e3a4f47c7890a122ae44ea0dc35a5da..fca5c62340a47fbbb25386002d7c8293fab7a40b 100644 (file)
@@ -1,6 +1,6 @@
 /* Keyring key type
  *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
 #define _KEYS_KEYRING_TYPE_H
 
 #include <linux/key.h>
-#include <linux/rcupdate.h>
-
-/*
- * the keyring payload contains a list of the keys to which the keyring is
- * subscribed
- */
-struct keyring_list {
-       struct rcu_head rcu;            /* RCU deletion hook */
-       unsigned short  maxkeys;        /* max keys this list can hold */
-       unsigned short  nkeys;          /* number of keys currently held */
-       unsigned short  delkey;         /* key to be unlinked by RCU */
-       struct key __rcu *keys[0];
-};
-
+#include <linux/assoc_array.h>
 
 #endif /* _KEYS_KEYRING_TYPE_H */
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
new file mode 100644 (file)
index 0000000..8dabc39
--- /dev/null
@@ -0,0 +1,23 @@
+/* System keyring containing trusted public keys.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_SYSTEM_KEYRING_H
+#define _KEYS_SYSTEM_KEYRING_H
+
+#ifdef CONFIG_SYSTEM_TRUSTED_KEYRING
+
+#include <linux/key.h>
+
+extern struct key *system_trusted_keyring;
+
+#endif
+
+#endif /* _KEYS_SYSTEM_KEYRING_H */
index b0972c4ce81c3cc7e8ad1bbb03c5dc9054f2f32a..d9099b15b4726404343308a39e5e55d39a16aa05 100644 (file)
 #include <acpi/acpi_numa.h>
 #include <asm/acpi.h>
 
+static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
+{
+       return adev ? adev->handle : NULL;
+}
+
+#define ACPI_COMPANION(dev)            ((dev)->acpi_node.companion)
+#define ACPI_COMPANION_SET(dev, adev)  ACPI_COMPANION(dev) = (adev)
+#define ACPI_HANDLE(dev)               acpi_device_handle(ACPI_COMPANION(dev))
+
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+       return dev_name(&adev->dev);
+}
+
 enum acpi_irq_model_id {
        ACPI_IRQ_MODEL_PIC = 0,
        ACPI_IRQ_MODEL_IOAPIC,
@@ -401,6 +415,15 @@ static inline bool acpi_driver_match_device(struct device *dev,
 
 #define acpi_disabled 1
 
+#define ACPI_COMPANION(dev)            (NULL)
+#define ACPI_COMPANION_SET(dev, adev)  do { } while (0)
+#define ACPI_HANDLE(dev)               (NULL)
+
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+       return NULL;
+}
+
 static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
new file mode 100644 (file)
index 0000000..9a193b8
--- /dev/null
@@ -0,0 +1,92 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_H
+#define _LINUX_ASSOC_ARRAY_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/types.h>
+
+#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */
+
+/*
+ * Generic associative array.
+ */
+struct assoc_array {
+       struct assoc_array_ptr  *root;          /* The node at the root of the tree */
+       unsigned long           nr_leaves_on_tree;
+};
+
+/*
+ * Operations on objects and index keys for use by array manipulation routines.
+ */
+struct assoc_array_ops {
+       /* Method to get a chunk of an index key from caller-supplied data */
+       unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+       /* Method to get a piece of an object's index key */
+       unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+       /* Is this the object we're looking for? */
+       bool (*compare_object)(const void *object, const void *index_key);
+
+       /* How different are two objects, to a bit position in their keys? (or
+        * -1 if they're the same)
+        */
+       int (*diff_objects)(const void *a, const void *b);
+
+       /* Method to free an object. */
+       void (*free_object)(void *object);
+};
+
+/*
+ * Access and manipulation functions.
+ */
+struct assoc_array_edit;
+
+static inline void assoc_array_init(struct assoc_array *array)
+{
+       array->root = NULL;
+       array->nr_leaves_on_tree = 0;
+}
+
+extern int assoc_array_iterate(const struct assoc_array *array,
+                              int (*iterator)(const void *object,
+                                              void *iterator_data),
+                              void *iterator_data);
+extern void *assoc_array_find(const struct assoc_array *array,
+                             const struct assoc_array_ops *ops,
+                             const void *index_key);
+extern void assoc_array_destroy(struct assoc_array *array,
+                               const struct assoc_array_ops *ops);
+extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+                                                  const struct assoc_array_ops *ops,
+                                                  const void *index_key,
+                                                  void *object);
+extern void assoc_array_insert_set_object(struct assoc_array_edit *edit,
+                                         void *object);
+extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+                                                  const struct assoc_array_ops *ops,
+                                                  const void *index_key);
+extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+                                                 const struct assoc_array_ops *ops);
+extern void assoc_array_apply_edit(struct assoc_array_edit *edit);
+extern void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+extern int assoc_array_gc(struct assoc_array *array,
+                         const struct assoc_array_ops *ops,
+                         bool (*iterator)(void *object, void *iterator_data),
+                         void *iterator_data);
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_H */
diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h
new file mode 100644 (file)
index 0000000..711275e
--- /dev/null
@@ -0,0 +1,182 @@
+/* Private definitions for the generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_PRIV_H
+#define _LINUX_ASSOC_ARRAY_PRIV_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/assoc_array.h>
+
+#define ASSOC_ARRAY_FAN_OUT            16      /* Number of slots per node */
+#define ASSOC_ARRAY_FAN_MASK           (ASSOC_ARRAY_FAN_OUT - 1)
+#define ASSOC_ARRAY_LEVEL_STEP         (ilog2(ASSOC_ARRAY_FAN_OUT))
+#define ASSOC_ARRAY_LEVEL_STEP_MASK    (ASSOC_ARRAY_LEVEL_STEP - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_MASK     (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_SHIFT    (ilog2(BITS_PER_LONG))
+
+/*
+ * Undefined type representing a pointer with type information in the bottom
+ * two bits.
+ */
+struct assoc_array_ptr;
+
+/*
+ * An N-way node in the tree.
+ *
+ * Each slot contains one of four things:
+ *
+ *     (1) Nothing (NULL).
+ *
+ *     (2) A leaf object (pointer types 0).
+ *
+ *     (3) A next-level node (pointer type 1, subtype 0).
+ *
+ *     (4) A shortcut (pointer type 1, subtype 1).
+ *
+ * The tree is optimised for search-by-ID, but permits reasonable iteration
+ * also.
+ *
+ * The tree is navigated by constructing an index key consisting of an array of
+ * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size.
+ *
+ * The segments correspond to levels of the tree (the first segment is used at
+ * level 0, the second at level 1, etc.).
+ */
+struct assoc_array_node {
+       struct assoc_array_ptr  *back_pointer;
+       u8                      parent_slot;
+       struct assoc_array_ptr  *slots[ASSOC_ARRAY_FAN_OUT];
+       unsigned long           nr_leaves_on_branch;
+};
+
+/*
+ * A shortcut through the index space out to where a collection of nodes/leaves
+ * with the same IDs live.
+ */
+struct assoc_array_shortcut {
+       struct assoc_array_ptr  *back_pointer;
+       int                     parent_slot;
+       int                     skip_to_level;
+       struct assoc_array_ptr  *next_node;
+       unsigned long           index_key[];
+};
+
+/*
+ * Preallocation cache.
+ */
+struct assoc_array_edit {
+       struct rcu_head                 rcu;
+       struct assoc_array              *array;
+       const struct assoc_array_ops    *ops;
+       const struct assoc_array_ops    *ops_for_excised_subtree;
+       struct assoc_array_ptr          *leaf;
+       struct assoc_array_ptr          **leaf_p;
+       struct assoc_array_ptr          *dead_leaf;
+       struct assoc_array_ptr          *new_meta[3];
+       struct assoc_array_ptr          *excised_meta[1];
+       struct assoc_array_ptr          *excised_subtree;
+       struct assoc_array_ptr          **set_backpointers[ASSOC_ARRAY_FAN_OUT];
+       struct assoc_array_ptr          *set_backpointers_to;
+       struct assoc_array_node         *adjust_count_on;
+       long                            adjust_count_by;
+       struct {
+               struct assoc_array_ptr  **ptr;
+               struct assoc_array_ptr  *to;
+       } set[2];
+       struct {
+               u8                      *p;
+               u8                      to;
+       } set_parent_slot[1];
+       u8                              segment_cache[ASSOC_ARRAY_FAN_OUT + 1];
+};
+
+/*
+ * Internal tree member pointers are marked in the bottom one or two bits to
+ * indicate what type they are so that we don't have to look behind every
+ * pointer to see what it points to.
+ *
+ * We provide functions to test type annotations and to create and translate
+ * the annotated pointers.
+ */
+#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL
+#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL        /* Points to leaf (or nowhere) */
+#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL        /* Points to node or shortcut */
+#define ASSOC_ARRAY_PTR_SUBTYPE_MASK   0x2UL
+#define ASSOC_ARRAY_PTR_NODE_SUBTYPE   0x0UL
+#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL
+
+static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x)
+{
+       return !assoc_array_ptr_is_meta(x);
+}
+static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x)
+{
+       return !assoc_array_ptr_is_shortcut(x);
+}
+
+static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x)
+{
+       return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+
+static inline
+unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x &
+               ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+static inline
+struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x)
+{
+       return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x);
+}
+static inline
+struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x)
+{
+       return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x);
+}
+
+static inline
+struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t)
+{
+       return (struct assoc_array_ptr *)((unsigned long)p | t);
+}
+static inline
+struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p)
+{
+       return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p)
+{
+       return __assoc_array_x_to_ptr(
+               p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p)
+{
+       return __assoc_array_x_to_ptr(
+               p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE);
+}
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
index 729a4d165bcc52b444ee0ea89e8faac36f6518f1..a40641954c296c3042c0e34f5c1ff170aee5cfa5 100644 (file)
@@ -73,6 +73,8 @@ struct audit_field {
        void                            *lsm_rule;
 };
 
+extern int is_audit_feature_set(int which);
+
 extern int __init audit_register_class(int class, unsigned *list);
 extern int audit_classify_syscall(int abi, unsigned syscall);
 extern int audit_classify_arch(int arch);
@@ -207,7 +209,7 @@ static inline int audit_get_sessionid(struct task_struct *tsk)
 
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
-extern int __audit_bprm(struct linux_binprm *bprm);
+extern void __audit_bprm(struct linux_binprm *bprm);
 extern int __audit_socketcall(int nargs, unsigned long *args);
 extern int __audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
@@ -236,11 +238,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
        if (unlikely(!audit_dummy_context()))
                __audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
-static inline int audit_bprm(struct linux_binprm *bprm)
+static inline void audit_bprm(struct linux_binprm *bprm)
 {
        if (unlikely(!audit_dummy_context()))
-               return __audit_bprm(bprm);
-       return 0;
+               __audit_bprm(bprm);
 }
 static inline int audit_socketcall(int nargs, unsigned long *args)
 {
@@ -367,10 +368,8 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
                                        gid_t gid, umode_t mode)
 { }
-static inline int audit_bprm(struct linux_binprm *bprm)
-{
-       return 0;
-}
+static inline void audit_bprm(struct linux_binprm *bprm)
+{ }
 static inline int audit_socketcall(int nargs, unsigned long *args)
 {
        return 0;
index f26ec20f635476a88c8879baad16a5a908433fa7..1b135d49b27985d3243cdbf92d6002ce5eea8597 100644 (file)
@@ -505,6 +505,9 @@ struct request_queue {
                                 (1 << QUEUE_FLAG_SAME_COMP)    |       \
                                 (1 << QUEUE_FLAG_ADD_RANDOM))
 
+#define QUEUE_FLAG_MQ_DEFAULT  ((1 << QUEUE_FLAG_IO_STAT) |            \
+                                (1 << QUEUE_FLAG_SAME_COMP))
+
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
        if (q->queue_lock)
index b025925df7f75a5b86fa894de93e2e83d0bdf571..952b01033c32dedcf83349a988b920c1c5ed8aa8 100644 (file)
@@ -644,9 +644,11 @@ struct device_dma_parameters {
        unsigned long segment_boundary_mask;
 };
 
+struct acpi_device;
+
 struct acpi_dev_node {
 #ifdef CONFIG_ACPI
-       void    *handle;
+       struct acpi_device *companion;
 #endif
 };
 
@@ -790,14 +792,6 @@ static inline struct device *kobj_to_dev(struct kobject *kobj)
        return container_of(kobj, struct device, kobj);
 }
 
-#ifdef CONFIG_ACPI
-#define ACPI_HANDLE(dev)       ((dev)->acpi_node.handle)
-#define ACPI_HANDLE_SET(dev, _handle_) (dev)->acpi_node.handle = (_handle_)
-#else
-#define ACPI_HANDLE(dev)       (NULL)
-#define ACPI_HANDLE_SET(dev, _handle_) do { } while (0)
-#endif
-
 /* Get the wakeup routines, which depend on struct device */
 #include <linux/pm_wakeup.h>
 
index 0bc727534108d5a2d5d527e75eaa8020a3ccd239..41cf0c399288e022edf32f7e65c6f151004829d9 100644 (file)
@@ -45,13 +45,13 @@ static inline int dma_submit_error(dma_cookie_t cookie)
 
 /**
  * enum dma_status - DMA transaction status
- * @DMA_SUCCESS: transaction completed successfully
+ * @DMA_COMPLETE: transaction completed
  * @DMA_IN_PROGRESS: transaction not yet processed
  * @DMA_PAUSED: transaction is paused
  * @DMA_ERROR: transaction failed
  */
 enum dma_status {
-       DMA_SUCCESS,
+       DMA_COMPLETE,
        DMA_IN_PROGRESS,
        DMA_PAUSED,
        DMA_ERROR,
@@ -171,12 +171,6 @@ struct dma_interleaved_template {
  * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
  *  acknowledges receipt, i.e. has has a chance to establish any dependency
  *  chains
- * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
- * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
- * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
- *     (if not set, do the source dma-unmapping as page)
- * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
- *     (if not set, do the destination dma-unmapping as page)
  * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
  * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
  * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
@@ -188,14 +182,10 @@ struct dma_interleaved_template {
 enum dma_ctrl_flags {
        DMA_PREP_INTERRUPT = (1 << 0),
        DMA_CTRL_ACK = (1 << 1),
-       DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
-       DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
-       DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
-       DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
-       DMA_PREP_PQ_DISABLE_P = (1 << 6),
-       DMA_PREP_PQ_DISABLE_Q = (1 << 7),
-       DMA_PREP_CONTINUE = (1 << 8),
-       DMA_PREP_FENCE = (1 << 9),
+       DMA_PREP_PQ_DISABLE_P = (1 << 2),
+       DMA_PREP_PQ_DISABLE_Q = (1 << 3),
+       DMA_PREP_CONTINUE = (1 << 4),
+       DMA_PREP_FENCE = (1 << 5),
 };
 
 /**
@@ -413,6 +403,17 @@ void dma_chan_cleanup(struct kref *kref);
 typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
 
 typedef void (*dma_async_tx_callback)(void *dma_async_param);
+
+struct dmaengine_unmap_data {
+       u8 to_cnt;
+       u8 from_cnt;
+       u8 bidi_cnt;
+       struct device *dev;
+       struct kref kref;
+       size_t len;
+       dma_addr_t addr[0];
+};
+
 /**
  * struct dma_async_tx_descriptor - async transaction descriptor
  * ---dma generic offload fields---
@@ -438,6 +439,7 @@ struct dma_async_tx_descriptor {
        dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
        dma_async_tx_callback callback;
        void *callback_param;
+       struct dmaengine_unmap_data *unmap;
 #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
        struct dma_async_tx_descriptor *next;
        struct dma_async_tx_descriptor *parent;
@@ -445,6 +447,40 @@ struct dma_async_tx_descriptor {
 #endif
 };
 
+#ifdef CONFIG_DMA_ENGINE
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+                                struct dmaengine_unmap_data *unmap)
+{
+       kref_get(&unmap->kref);
+       tx->unmap = unmap;
+}
+
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags);
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap);
+#else
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+                                struct dmaengine_unmap_data *unmap)
+{
+}
+static inline struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+       return NULL;
+}
+static inline void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+}
+#endif
+
+static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx)
+{
+       if (tx->unmap) {
+               dmaengine_unmap_put(tx->unmap);
+               tx->unmap = NULL;
+       }
+}
+
 #ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 static inline void txd_lock(struct dma_async_tx_descriptor *txd)
 {
@@ -979,10 +1015,10 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
 {
        if (last_complete <= last_used) {
                if ((cookie <= last_complete) || (cookie > last_used))
-                       return DMA_SUCCESS;
+                       return DMA_COMPLETE;
        } else {
                if ((cookie <= last_complete) && (cookie > last_used))
-                       return DMA_SUCCESS;
+                       return DMA_COMPLETE;
        }
        return DMA_IN_PROGRESS;
 }
@@ -1013,11 +1049,11 @@ static inline struct dma_chan *dma_find_channel(enum dma_transaction_type tx_typ
 }
 static inline enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
 {
-       return DMA_SUCCESS;
+       return DMA_COMPLETE;
 }
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
-       return DMA_SUCCESS;
+       return DMA_COMPLETE;
 }
 static inline void dma_issue_pending_all(void)
 {
index bf5d574ebdf4c94da0238c14add54bc51587175a..121f11f001c06f1cdcdeb6533234d76626a2915b 100644 (file)
@@ -2622,7 +2622,9 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
 extern int simple_write_end(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *page, void *fsdata);
+extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
+extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
index acd2010328f3044b1cf6dd788e4ac09ab9262925..9649ff0c63f8d0bb5253cd08b3f32499986b4cfd 100644 (file)
@@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
 void hugepage_put_subpool(struct hugepage_subpool *spool);
 
 int PageHuge(struct page *page);
+int PageHeadHuge(struct page *page_head);
 
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
 int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -69,7 +70,6 @@ int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
 bool is_hugepage_active(struct page *page);
-void copy_huge_page(struct page *dst, struct page *src);
 
 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
@@ -104,6 +104,11 @@ static inline int PageHuge(struct page *page)
        return 0;
 }
 
+static inline int PageHeadHuge(struct page *page_head)
+{
+       return 0;
+}
+
 static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 }
@@ -140,9 +145,6 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page)
 #define isolate_huge_page(p, l) false
 #define putback_active_hugepage(p)     do {} while (0)
 #define is_hugepage_active(x)  false
-static inline void copy_huge_page(struct page *dst, struct page *src)
-{
-}
 
 static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                unsigned long address, unsigned long end, pgprot_t newprot)
index 518a53afb9ea24b301d87d4af6c824bca5fc5548..a74c3a84dfdd05223cc0613c9445bdc396e1afe9 100644 (file)
@@ -45,6 +45,7 @@ struct key_preparsed_payload {
        const void      *data;          /* Raw data */
        size_t          datalen;        /* Raw datalen */
        size_t          quotalen;       /* Quota length for proposed payload */
+       bool            trusted;        /* True if key is trusted */
 };
 
 typedef int (*request_key_actor_t)(struct key_construction *key,
@@ -63,6 +64,11 @@ struct key_type {
         */
        size_t def_datalen;
 
+       /* Default key search algorithm. */
+       unsigned def_lookup_type;
+#define KEYRING_SEARCH_LOOKUP_DIRECT   0x0000  /* Direct lookup by description. */
+#define KEYRING_SEARCH_LOOKUP_ITERATE  0x0001  /* Iterative search. */
+
        /* vet a description */
        int (*vet_description)(const char *description);
 
index 4dfde1161c5e7878565d05ad7f5293e9e4d19cef..80d677483e31f95ce4b08ef00ffb79c239932c18 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/sysctl.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
+#include <linux/assoc_array.h>
 
 #ifdef __KERNEL__
 #include <linux/uidgid.h>
@@ -82,6 +83,12 @@ struct key_owner;
 struct keyring_list;
 struct keyring_name;
 
+struct keyring_index_key {
+       struct key_type         *type;
+       const char              *description;
+       size_t                  desc_len;
+};
+
 /*****************************************************************************/
 /*
  * key reference with possession attribute handling
@@ -99,7 +106,7 @@ struct keyring_name;
 typedef struct __key_reference_with_attributes *key_ref_t;
 
 static inline key_ref_t make_key_ref(const struct key *key,
-                                    unsigned long possession)
+                                    bool possession)
 {
        return (key_ref_t) ((unsigned long) key | possession);
 }
@@ -109,7 +116,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref)
        return (struct key *) ((unsigned long) key_ref & ~1UL);
 }
 
-static inline unsigned long is_key_possessed(const key_ref_t key_ref)
+static inline bool is_key_possessed(const key_ref_t key_ref)
 {
        return (unsigned long) key_ref & 1UL;
 }
@@ -129,7 +136,6 @@ struct key {
                struct list_head graveyard_link;
                struct rb_node  serial_node;
        };
-       struct key_type         *type;          /* type of key */
        struct rw_semaphore     sem;            /* change vs change sem */
        struct key_user         *user;          /* owner of this key */
        void                    *security;      /* security data for this key */
@@ -162,13 +168,21 @@ struct key {
 #define KEY_FLAG_NEGATIVE      5       /* set if key is negative */
 #define KEY_FLAG_ROOT_CAN_CLEAR        6       /* set if key can be cleared by root without permission */
 #define KEY_FLAG_INVALIDATED   7       /* set if key has been invalidated */
+#define KEY_FLAG_TRUSTED       8       /* set if key is trusted */
+#define KEY_FLAG_TRUSTED_ONLY  9       /* set if keyring only accepts links to trusted keys */
 
-       /* the description string
-        * - this is used to match a key against search criteria
-        * - this should be a printable string
+       /* the key type and key description string
+        * - the desc is used to match a key against search criteria
+        * - it should be a printable string
         * - eg: for krb5 AFS, this might be "afs@REDHAT.COM"
         */
-       char                    *description;
+       union {
+               struct keyring_index_key index_key;
+               struct {
+                       struct key_type *type;          /* type of key */
+                       char            *description;
+               };
+       };
 
        /* type specific data
         * - this is used by the keyring type to index the name
@@ -185,11 +199,14 @@ struct key {
         *   whatever
         */
        union {
-               unsigned long           value;
-               void __rcu              *rcudata;
-               void                    *data;
-               struct keyring_list __rcu *subscriptions;
-       } payload;
+               union {
+                       unsigned long           value;
+                       void __rcu              *rcudata;
+                       void                    *data;
+                       void                    *data2[2];
+               } payload;
+               struct assoc_array keys;
+       };
 };
 
 extern struct key *key_alloc(struct key_type *type,
@@ -203,18 +220,23 @@ extern struct key *key_alloc(struct key_type *type,
 #define KEY_ALLOC_IN_QUOTA     0x0000  /* add to quota, reject if would overrun */
 #define KEY_ALLOC_QUOTA_OVERRUN        0x0001  /* add to quota, permit even if overrun */
 #define KEY_ALLOC_NOT_IN_QUOTA 0x0002  /* not in quota */
+#define KEY_ALLOC_TRUSTED      0x0004  /* Key should be flagged as trusted */
 
 extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
 extern void key_put(struct key *key);
 
-static inline struct key *key_get(struct key *key)
+static inline struct key *__key_get(struct key *key)
 {
-       if (key)
-               atomic_inc(&key->usage);
+       atomic_inc(&key->usage);
        return key;
 }
 
+static inline struct key *key_get(struct key *key)
+{
+       return key ? __key_get(key) : key;
+}
+
 static inline void key_ref_put(key_ref_t key_ref)
 {
        key_put(key_ref_to_ptr(key_ref));
index 0548eb201e058afdd32f2645240633ff7a8783d4..1cedd000cf293f4486575b87086b01e2ee1b26e5 100644 (file)
@@ -1318,7 +1318,6 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 
 #if USE_SPLIT_PTE_PTLOCKS
 #if BLOATED_SPINLOCKS
-void __init ptlock_cache_init(void);
 extern bool ptlock_alloc(struct page *page);
 extern void ptlock_free(struct page *page);
 
@@ -1327,7 +1326,6 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
        return page->ptl;
 }
 #else /* BLOATED_SPINLOCKS */
-static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_alloc(struct page *page)
 {
        return true;
@@ -1380,17 +1378,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
        return &mm->page_table_lock;
 }
-static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct page *page) { return true; }
 static inline void pte_lock_deinit(struct page *page) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
-static inline void pgtable_init(void)
-{
-       ptlock_cache_init();
-       pgtable_cache_init();
-}
-
 static inline bool pgtable_page_ctor(struct page *page)
 {
        inc_zone_page_state(page, NR_PAGETABLE);
index 10f5a7272b8046ff7561b6d78a3b665828415ef1..bd299418a934e21b99c303af82a7c2f427bbf915 100644 (file)
@@ -44,18 +44,22 @@ struct page {
        /* First double word block */
        unsigned long flags;            /* Atomic flags, some possibly
                                         * updated asynchronously */
-       struct address_space *mapping;  /* If low bit clear, points to
-                                        * inode address_space, or NULL.
-                                        * If page mapped as anonymous
-                                        * memory, low bit is set, and
-                                        * it points to anon_vma object:
-                                        * see PAGE_MAPPING_ANON below.
-                                        */
+       union {
+               struct address_space *mapping;  /* If low bit clear, points to
+                                                * inode address_space, or NULL.
+                                                * If page mapped as anonymous
+                                                * memory, low bit is set, and
+                                                * it points to anon_vma object:
+                                                * see PAGE_MAPPING_ANON below.
+                                                */
+               void *s_mem;                    /* slab first object */
+       };
+
        /* Second double word */
        struct {
                union {
                        pgoff_t index;          /* Our offset within mapping. */
-                       void *freelist;         /* slub/slob first free object */
+                       void *freelist;         /* sl[aou]b first free object */
                        bool pfmemalloc;        /* If set by the page allocator,
                                                 * ALLOC_NO_WATERMARKS was set
                                                 * and the low watermark was not
@@ -65,9 +69,6 @@ struct page {
                                                 * this page is only used to
                                                 * free other pages.
                                                 */
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
-               pgtable_t pmd_huge_pte; /* protected by page->ptl */
-#endif
                };
 
                union {
@@ -114,6 +115,7 @@ struct page {
                                };
                                atomic_t _count;                /* Usage count, see below. */
                        };
+                       unsigned int active;    /* SLAB */
                };
        };
 
@@ -135,6 +137,12 @@ struct page {
 
                struct list_head list;  /* slobs list of pages */
                struct slab *slab_page; /* slab fields */
+               struct rcu_head rcu_head;       /* Used by SLAB
+                                                * when destroying via RCU
+                                                */
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
+               pgtable_t pmd_huge_pte; /* protected by page->ptl */
+#endif
        };
 
        /* Remainder is not double word aligned */
index d006f0ca60f46e92705fb8b3fd315ebacc5242b2..5a462c4e5009d68960525d2167728d59ee75e33b 100644 (file)
@@ -27,7 +27,7 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
        while (!pci_is_root_bus(pbus))
                pbus = pbus->parent;
 
-       return DEVICE_ACPI_HANDLE(pbus->bridge);
+       return ACPI_HANDLE(pbus->bridge);
 }
 
 static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
@@ -39,7 +39,7 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
        else
                dev = &pbus->self->dev;
 
-       return DEVICE_ACPI_HANDLE(dev);
+       return ACPI_HANDLE(dev);
 }
 
 void acpi_pci_add_bus(struct pci_bus *bus);
index 179fb91bb5f2eaef7354e37411628f90a6c02335..f50821cb64be8c11165659b0f0719398bfa004cf 100644 (file)
@@ -67,10 +67,10 @@ struct edmacc_param {
 #define ITCCHEN                BIT(23)
 
 /*ch_status paramater of callback function possible values*/
-#define DMA_COMPLETE 1
-#define DMA_CC_ERROR 2
-#define DMA_TC1_ERROR 3
-#define DMA_TC2_ERROR 4
+#define EDMA_DMA_COMPLETE 1
+#define EDMA_DMA_CC_ERROR 2
+#define EDMA_DMA_TC1_ERROR 3
+#define EDMA_DMA_TC2_ERROR 4
 
 enum address_mode {
        INCR = 0,
index 9d37e2b9d3ec030f026117d99de94dfd50b32ab0..5623a7f965b7bbb07ab94a72351aec027ef4adb7 100644 (file)
@@ -1052,17 +1052,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @xfrm_policy_delete_security:
  *     @ctx contains the xfrm_sec_ctx.
  *     Authorize deletion of xp->security.
- * @xfrm_state_alloc_security:
+ * @xfrm_state_alloc:
  *     @x contains the xfrm_state being added to the Security Association
  *     Database by the XFRM system.
  *     @sec_ctx contains the security context information being provided by
  *     the user-level SA generation program (e.g., setkey or racoon).
- *     @secid contains the secid from which to take the mls portion of the context.
  *     Allocate a security structure to the x->security field; the security
  *     field is initialized to NULL when the xfrm_state is allocated. Set the
- *     context to correspond to either sec_ctx or polsec, with the mls portion
- *     taken from secid in the latter case.
- *     Return 0 if operation was successful (memory to allocate, legal context).
+ *     context to correspond to sec_ctx. Return 0 if operation was successful
+ *     (memory to allocate, legal context).
+ * @xfrm_state_alloc_acquire:
+ *     @x contains the xfrm_state being added to the Security Association
+ *     Database by the XFRM system.
+ *     @polsec contains the policy's security context.
+ *     @secid contains the secid from which to take the mls portion of the
+ *     context.
+ *     Allocate a security structure to the x->security field; the security
+ *     field is initialized to NULL when the xfrm_state is allocated. Set the
+ *     context to correspond to secid. Return 0 if operation was successful
+ *     (memory to allocate, legal context).
  * @xfrm_state_free_security:
  *     @x contains the xfrm_state.
  *     Deallocate x->security.
@@ -1679,9 +1687,11 @@ struct security_operations {
        int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx);
        void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx);
        int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx);
-       int (*xfrm_state_alloc_security) (struct xfrm_state *x,
-               struct xfrm_user_sec_ctx *sec_ctx,
-               u32 secid);
+       int (*xfrm_state_alloc) (struct xfrm_state *x,
+                                struct xfrm_user_sec_ctx *sec_ctx);
+       int (*xfrm_state_alloc_acquire) (struct xfrm_state *x,
+                                        struct xfrm_sec_ctx *polsec,
+                                        u32 secid);
        void (*xfrm_state_free_security) (struct xfrm_state *x);
        int (*xfrm_state_delete_security) (struct xfrm_state *x);
        int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
index 1e8a8b6e837d8621fa5488f832273df673ff3520..cf87a24c0f92f1963081c93d7b0cce2356025358 100644 (file)
@@ -354,6 +354,35 @@ static inline void read_sequnlock_excl(seqlock_t *sl)
        spin_unlock(&sl->lock);
 }
 
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * @lock: sequence lock
+ * @seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+       if (!(*seq & 1))        /* Even */
+               *seq = read_seqbegin(lock);
+       else                    /* Odd */
+               read_seqlock_excl(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+       return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+       if (seq & 1)
+               read_sequnlock_excl(lock);
+}
+
 static inline void read_seqlock_excl_bh(seqlock_t *sl)
 {
        spin_lock_bh(&sl->lock);
index 74f105847d13ceae757c8aa6b83bdf8412816696..c2bba248fa63d46024930f5b4d6c47b652a51dc6 100644 (file)
  *  }
  *  rcu_read_unlock();
  *
- * See also the comment on struct slab_rcu in mm/slab.c.
+ * This is useful if we need to approach a kernel structure obliquely,
+ * from its address obtained without the usual locking. We can lock
+ * the structure to stabilize it and check it's still at the given address,
+ * only if we can be sure that the memory has not been meanwhile reused
+ * for some other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
  */
 #define SLAB_DESTROY_BY_RCU    0x00080000UL    /* Defer freeing slabs to RCU */
 #define SLAB_MEM_SPREAD                0x00100000UL    /* Spread some memory over cpuset */
index e9346b4f1ef4b2ef6d302a5799e30e631fa10ce2..09bfffb08a56db285caa27146202f04e2188b480 100644 (file)
@@ -27,8 +27,8 @@ struct kmem_cache {
 
        size_t colour;                  /* cache colouring range */
        unsigned int colour_off;        /* colour offset */
-       struct kmem_cache *slabp_cache;
-       unsigned int slab_size;
+       struct kmem_cache *freelist_cache;
+       unsigned int freelist_size;
 
        /* constructor func */
        void (*ctor)(void *obj);
index cc0b67eada4260331276e9ae145fa593c14ecee8..f56bfa9e4526f6382467fe2647c19bdbca185346 100644 (file)
@@ -11,7 +11,7 @@
 enum stat_item {
        ALLOC_FASTPATH,         /* Allocation from cpu slab */
        ALLOC_SLOWPATH,         /* Allocation by getting a new cpu slab */
-       FREE_FASTPATH,          /* Free to cpu slub */
+       FREE_FASTPATH,          /* Free to cpu slab */
        FREE_SLOWPATH,          /* Freeing not to cpu slab */
        FREE_FROZEN,            /* Freeing to frozen slab */
        FREE_ADD_PARTIAL,       /* Freeing moves slab to partial list */
index 4db29859464f3af3d78caafa41a1e7525034a589..4836ba3c1cd8266c294b9dfd28aa6d0d433db0d6 100644 (file)
@@ -27,6 +27,12 @@ struct user_namespace {
        kuid_t                  owner;
        kgid_t                  group;
        unsigned int            proc_inum;
+
+       /* Register of per-UID persistent keyrings for this namespace */
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       struct key              *persistent_keyring_register;
+       struct rw_semaphore     persistent_keyring_register_sem;
+#endif
 };
 
 extern struct user_namespace init_user_ns;
index 61939ba30aa0abdbe7e36e608343338c80fada6b..eaa00b10abaaa53cf441170841c3faec588e9de0 100644 (file)
@@ -278,6 +278,31 @@ do {                                                                       \
        __ret;                                                          \
 })
 
+#define __wait_event_cmd(wq, condition, cmd1, cmd2)                    \
+       (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,  \
+                           cmd1; schedule(); cmd2)
+
+/**
+ * wait_event_cmd - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * cmd1: the command will be executed before sleep
+ * cmd2: the command will be executed after sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define wait_event_cmd(wq, condition, cmd1, cmd2)                      \
+do {                                                                   \
+       if (condition)                                                  \
+               break;                                                  \
+       __wait_event_cmd(wq, condition, cmd1, cmd2);                    \
+} while (0)
+
 #define __wait_event_interruptible(wq, condition)                      \
        ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,          \
                      schedule())
index f18b3b76e01e22e00c00ee133b2ebdc1013bcc62..4832d75dcbaedb888a2751303fe7d5a4e62b8010 100644 (file)
@@ -162,12 +162,14 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
                { EXTENT_FLAG_LOGGING,          "LOGGING"       },      \
                { EXTENT_FLAG_FILLING,          "FILLING"       })
 
-TRACE_EVENT(btrfs_get_extent,
+TRACE_EVENT_CONDITION(btrfs_get_extent,
 
        TP_PROTO(struct btrfs_root *root, struct extent_map *map),
 
        TP_ARGS(root, map),
 
+       TP_CONDITION(map),
+
        TP_STRUCT__entry(
                __field(        u64,  root_objectid     )
                __field(        u64,  start             )
index db0b825b48109f2e8cc011f211749043757a33e5..44b05a09f1933a1c293a2174eb58d4db7b473b80 100644 (file)
@@ -68,6 +68,9 @@
 #define AUDIT_MAKE_EQUIV       1015    /* Append to watched tree */
 #define AUDIT_TTY_GET          1016    /* Get TTY auditing status */
 #define AUDIT_TTY_SET          1017    /* Set TTY auditing status */
+#define AUDIT_SET_FEATURE      1018    /* Turn an audit feature on or off */
+#define AUDIT_GET_FEATURE      1019    /* Get which features are enabled */
+#define AUDIT_FEATURE_CHANGE   1020    /* audit log listing feature changes */
 
 #define AUDIT_FIRST_USER_MSG   1100    /* Userspace messages mostly uninteresting to kernel */
 #define AUDIT_USER_AVC         1107    /* We filter this differently */
@@ -357,6 +360,12 @@ enum {
 #define AUDIT_PERM_READ                4
 #define AUDIT_PERM_ATTR                8
 
+/* MAX_AUDIT_MESSAGE_LENGTH is set in audit:lib/libaudit.h as:
+ * 8970 // PATH_MAX*2+CONTEXT_SIZE*2+11+256+1
+ * max header+body+tailer: 44 + 29 + 32 + 262 + 7 + pad
+ */
+#define AUDIT_MESSAGE_TEXT_MAX 8560
+
 struct audit_status {
        __u32           mask;           /* Bit mask for valid entries */
        __u32           enabled;        /* 1 = enabled, 0 = disabled */
@@ -368,11 +377,28 @@ struct audit_status {
        __u32           backlog;        /* messages waiting in queue */
 };
 
+struct audit_features {
+#define AUDIT_FEATURE_VERSION  1
+       __u32   vers;
+       __u32   mask;           /* which bits we are dealing with */
+       __u32   features;       /* which feature to enable/disable */
+       __u32   lock;           /* which features to lock */
+};
+
+#define AUDIT_FEATURE_ONLY_UNSET_LOGINUID      0
+#define AUDIT_FEATURE_LOGINUID_IMMUTABLE       1
+#define AUDIT_LAST_FEATURE                     AUDIT_FEATURE_LOGINUID_IMMUTABLE
+
+#define audit_feature_valid(x)         ((x) >= 0 && (x) <= AUDIT_LAST_FEATURE)
+#define AUDIT_FEATURE_TO_MASK(x)       (1 << ((x) & 31)) /* mask for __u32 */
+
 struct audit_tty_status {
        __u32           enabled;        /* 1 = enabled, 0 = disabled */
        __u32           log_passwd;     /* 1 = enabled, 0 = disabled */
 };
 
+#define AUDIT_UID_UNSET (unsigned int)-1
+
 /* audit_rule_data supports filter rules with both integer and string
  * fields.  It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
  * AUDIT_LIST_RULES requests.
diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h
new file mode 100644 (file)
index 0000000..ca18c45
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Hash Info: Hash algorithms information
+ *
+ * Copyright (c) 2013 Dmitry Kasatkin <d.kasatkin@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _UAPI_LINUX_HASH_INFO_H
+#define _UAPI_LINUX_HASH_INFO_H
+
+enum hash_algo {
+       HASH_ALGO_MD4,
+       HASH_ALGO_MD5,
+       HASH_ALGO_SHA1,
+       HASH_ALGO_RIPE_MD_160,
+       HASH_ALGO_SHA256,
+       HASH_ALGO_SHA384,
+       HASH_ALGO_SHA512,
+       HASH_ALGO_SHA224,
+       HASH_ALGO_RIPE_MD_128,
+       HASH_ALGO_RIPE_MD_256,
+       HASH_ALGO_RIPE_MD_320,
+       HASH_ALGO_WP_256,
+       HASH_ALGO_WP_384,
+       HASH_ALGO_WP_512,
+       HASH_ALGO_TGR_128,
+       HASH_ALGO_TGR_160,
+       HASH_ALGO_TGR_192,
+       HASH_ALGO__LAST
+};
+
+#endif /* _UAPI_LINUX_HASH_INFO_H */
index c9b7f4faf97aa5a790f6b58fd6d9da25ca2ebd64..840cb990abe2e7147ec89c92ef143e86a35ce0f8 100644 (file)
@@ -56,5 +56,6 @@
 #define KEYCTL_REJECT                  19      /* reject a partially constructed key */
 #define KEYCTL_INSTANTIATE_IOV         20      /* instantiate a partially constructed key */
 #define KEYCTL_INVALIDATE              21      /* invalidate a key */
+#define KEYCTL_GET_PERSISTENT          22      /* get a user's persistent keyring */
 
 #endif /*  _LINUX_KEYCTL_H */
index fe1a5406d4d93cdf6012d11ab7315f014b552ede..f7cf7f351144873efd0bde3bdfe4e8f1676eb41d 100644 (file)
@@ -16,6 +16,7 @@
 #define _MD_P_H
 
 #include <linux/types.h>
+#include <asm/byteorder.h>
 
 /*
  * RAID superblock.
index 3fc8a2f2fac4462eb7b1d1c5c1c1b7c4c3cabdf7..79383d3aa5dc5f7fe64abdcaf7d511144ca016e5 100644 (file)
@@ -301,20 +301,6 @@ config AUDIT_TREE
        depends on AUDITSYSCALL
        select FSNOTIFY
 
-config AUDIT_LOGINUID_IMMUTABLE
-       bool "Make audit loginuid immutable"
-       depends on AUDIT
-       help
-         The config option toggles if a task setting its loginuid requires
-         CAP_SYS_AUDITCONTROL or if that task should require no special permissions
-         but should instead only allow setting its loginuid if it was never
-         previously set.  On systems which use systemd or a similar central
-         process to restart login services this should be set to true.  On older
-         systems in which an admin would typically have to directly stop and
-         start processes this should be set to false.  Setting this to true allows
-         one to drop potentially dangerous capabilites from the login tasks,
-         but may not be backwards compatible with older init systems.
-
 source "kernel/irq/Kconfig"
 source "kernel/time/Kconfig"
 
@@ -1669,6 +1655,18 @@ config BASE_SMALL
        default 0 if BASE_FULL
        default 1 if !BASE_FULL
 
+config SYSTEM_TRUSTED_KEYRING
+       bool "Provide system-wide ring of trusted keys"
+       depends on KEYS
+       help
+         Provide a system keyring to which trusted keys can be added.  Keys in
+         the keyring are considered to be trusted.  Keys may be added at will
+         by the kernel from compiled-in data and from hardware key stores, but
+         userspace may only add extra keys if those keys can be verified by
+         keys already in the keyring.
+
+         Keys in this keyring are used by module signature checking.
+
 menuconfig MODULES
        bool "Enable loadable module support"
        option modules
@@ -1742,6 +1740,7 @@ config MODULE_SRCVERSION_ALL
 config MODULE_SIG
        bool "Module signature verification"
        depends on MODULES
+       select SYSTEM_TRUSTED_KEYRING
        select KEYS
        select CRYPTO
        select ASYMMETRIC_KEY_TYPE
index 01573fdfa1868fd7fb320d472eff6217f39a07eb..febc511e078a65d08ac4dd6a3be67b26926195cd 100644 (file)
@@ -476,7 +476,7 @@ static void __init mm_init(void)
        mem_init();
        kmem_cache_init();
        percpu_init_late();
-       pgtable_init();
+       pgtable_cache_init();
        vmalloc_init();
 }
 
index d69739610fd4384323004c46782116d54db6bbd5..7a51443a51d6421bd2a02a66ec18db98f6796dd3 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -208,15 +208,18 @@ static void shm_open(struct vm_area_struct *vma)
  */
 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
+       struct file *shm_file;
+
+       shm_file = shp->shm_file;
+       shp->shm_file = NULL;
        ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
        shm_rmid(ns, shp);
        shm_unlock(shp);
-       if (!is_file_hugepages(shp->shm_file))
-               shmem_lock(shp->shm_file, 0, shp->mlock_user);
+       if (!is_file_hugepages(shm_file))
+               shmem_lock(shm_file, 0, shp->mlock_user);
        else if (shp->mlock_user)
-               user_shm_unlock(file_inode(shp->shm_file)->i_size,
-                                               shp->mlock_user);
-       fput (shp->shm_file);
+               user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user);
+       fput(shm_file);
        ipc_rcu_putref(shp, shm_rcu_free);
 }
 
@@ -974,15 +977,25 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                ipc_lock_object(&shp->shm_perm);
                if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
                        kuid_t euid = current_euid();
-                       err = -EPERM;
                        if (!uid_eq(euid, shp->shm_perm.uid) &&
-                           !uid_eq(euid, shp->shm_perm.cuid))
+                           !uid_eq(euid, shp->shm_perm.cuid)) {
+                               err = -EPERM;
                                goto out_unlock0;
-                       if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
+                       }
+                       if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
+                               err = -EPERM;
                                goto out_unlock0;
+                       }
                }
 
                shm_file = shp->shm_file;
+
+               /* check if shm_destroy() is tearing down shp */
+               if (shm_file == NULL) {
+                       err = -EIDRM;
+                       goto out_unlock0;
+               }
+
                if (is_file_hugepages(shm_file))
                        goto out_unlock0;
 
@@ -1101,6 +1114,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
                goto out_unlock;
 
        ipc_lock_object(&shp->shm_perm);
+
+       /* check if shm_destroy() is tearing down shp */
+       if (shp->shm_file == NULL) {
+               ipc_unlock_object(&shp->shm_perm);
+               err = -EIDRM;
+               goto out_unlock;
+       }
+
        path = shp->shm_file->f_path;
        path_get(&path);
        shp->shm_nattch++;
index 09a9c94f42bde841a58b875ca7fc75c1b69f65c6..bbaf7d59c1bb14f166441e6532b55585790e4b52 100644 (file)
@@ -41,8 +41,9 @@ ifneq ($(CONFIG_SMP),y)
 obj-y += up.o
 endif
 obj-$(CONFIG_UID16) += uid16.o
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
 obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o
+obj-$(CONFIG_MODULE_SIG) += module_signing.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
@@ -122,19 +123,52 @@ targets += timeconst.h
 $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
        $(call if_changed,bc)
 
-ifeq ($(CONFIG_MODULE_SIG),y)
+###############################################################################
+#
+# Roll all the X.509 certificates that we can find together and pull them into
+# the kernel so that they get loaded into the system trusted keyring during
+# boot.
 #
-# Pull the signing certificate and any extra certificates into the kernel
+# We look in the source root and the build root for all files whose name ends
+# in ".x509".  Unfortunately, this will generate duplicate filenames, so we
+# have make canonicalise the pathnames and then sort them to discard the
+# duplicates.
 #
+###############################################################################
+ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
+X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509)
+X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509
+X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \
+                               $(or $(realpath $(CERT)),$(CERT))))
+
+ifeq ($(X509_CERTIFICATES),)
+$(warning *** No X.509 certificates found ***)
+endif
+
+ifneq ($(wildcard $(obj)/.x509.list),)
+ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES))
+$(info X.509 certificate list changed)
+$(shell rm $(obj)/.x509.list)
+endif
+endif
+
+kernel/system_certificates.o: $(obj)/x509_certificate_list
 
-quiet_cmd_touch = TOUCH   $@
-      cmd_touch = touch   $@
+quiet_cmd_x509certs  = CERTS   $@
+      cmd_x509certs  = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo "  - Including cert $(X509)")
 
-extra_certificates:
-       $(call cmd,touch)
+targets += $(obj)/x509_certificate_list
+$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list
+       $(call if_changed,x509certs)
 
-kernel/modsign_certificate.o: signing_key.x509 extra_certificates
+targets += $(obj)/.x509.list
+$(obj)/.x509.list:
+       @echo $(X509_CERTIFICATES) >$@
 
+clean-files := x509_certificate_list .x509.list
+endif
+
+ifeq ($(CONFIG_MODULE_SIG),y)
 ###############################################################################
 #
 # If module signing is requested, say by allyesconfig, but a key has not been
index 7b0e23a740ce345987c33f9e012302c24de0f4db..906ae5a0233a1011d558ff47c548808517ef9a03 100644 (file)
@@ -60,7 +60,6 @@
 #ifdef CONFIG_SECURITY
 #include <linux/security.h>
 #endif
-#include <net/netlink.h>
 #include <linux/freezer.h>
 #include <linux/tty.h>
 #include <linux/pid_namespace.h>
@@ -140,6 +139,17 @@ static struct task_struct *kauditd_task;
 static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
 static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
 
+static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
+                                  .mask = -1,
+                                  .features = 0,
+                                  .lock = 0,};
+
+static char *audit_feature_names[2] = {
+       "only_unset_loginuid",
+       "loginuid_immutable",
+};
+
+
 /* Serialize requests from userspace. */
 DEFINE_MUTEX(audit_cmd_mutex);
 
@@ -584,6 +594,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
                return -EOPNOTSUPP;
        case AUDIT_GET:
        case AUDIT_SET:
+       case AUDIT_GET_FEATURE:
+       case AUDIT_SET_FEATURE:
        case AUDIT_LIST_RULES:
        case AUDIT_ADD_RULE:
        case AUDIT_DEL_RULE:
@@ -613,7 +625,7 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
        int rc = 0;
        uid_t uid = from_kuid(&init_user_ns, current_uid());
 
-       if (!audit_enabled) {
+       if (!audit_enabled && msg_type != AUDIT_USER_AVC) {
                *ab = NULL;
                return rc;
        }
@@ -628,6 +640,94 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
        return rc;
 }
 
+int is_audit_feature_set(int i)
+{
+       return af.features & AUDIT_FEATURE_TO_MASK(i);
+}
+
+
+static int audit_get_feature(struct sk_buff *skb)
+{
+       u32 seq;
+
+       seq = nlmsg_hdr(skb)->nlmsg_seq;
+
+       audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
+                        &af, sizeof(af));
+
+       return 0;
+}
+
+static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature,
+                                    u32 old_lock, u32 new_lock, int res)
+{
+       struct audit_buffer *ab;
+
+       ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+       audit_log_format(ab, "feature=%s new=%d old=%d old_lock=%d new_lock=%d res=%d",
+                        audit_feature_names[which], !!old_feature, !!new_feature,
+                        !!old_lock, !!new_lock, res);
+       audit_log_end(ab);
+}
+
+static int audit_set_feature(struct sk_buff *skb)
+{
+       struct audit_features *uaf;
+       int i;
+
+       BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > sizeof(audit_feature_names)/sizeof(audit_feature_names[0]));
+       uaf = nlmsg_data(nlmsg_hdr(skb));
+
+       /* if there is ever a version 2 we should handle that here */
+
+       for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+               u32 feature = AUDIT_FEATURE_TO_MASK(i);
+               u32 old_feature, new_feature, old_lock, new_lock;
+
+               /* if we are not changing this feature, move along */
+               if (!(feature & uaf->mask))
+                       continue;
+
+               old_feature = af.features & feature;
+               new_feature = uaf->features & feature;
+               new_lock = (uaf->lock | af.lock) & feature;
+               old_lock = af.lock & feature;
+
+               /* are we changing a locked feature? */
+               if ((af.lock & feature) && (new_feature != old_feature)) {
+                       audit_log_feature_change(i, old_feature, new_feature,
+                                                old_lock, new_lock, 0);
+                       return -EPERM;
+               }
+       }
+       /* nothing invalid, do the changes */
+       for (i = 0; i <= AUDIT_LAST_FEATURE; i++) {
+               u32 feature = AUDIT_FEATURE_TO_MASK(i);
+               u32 old_feature, new_feature, old_lock, new_lock;
+
+               /* if we are not changing this feature, move along */
+               if (!(feature & uaf->mask))
+                       continue;
+
+               old_feature = af.features & feature;
+               new_feature = uaf->features & feature;
+               old_lock = af.lock & feature;
+               new_lock = (uaf->lock | af.lock) & feature;
+
+               if (new_feature != old_feature)
+                       audit_log_feature_change(i, old_feature, new_feature,
+                                                old_lock, new_lock, 1);
+
+               if (new_feature)
+                       af.features |= feature;
+               else
+                       af.features &= ~feature;
+               af.lock |= new_lock;
+       }
+
+       return 0;
+}
+
 static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
        u32                     seq;
@@ -659,6 +759,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
        switch (msg_type) {
        case AUDIT_GET:
+               memset(&status_set, 0, sizeof(status_set));
                status_set.enabled       = audit_enabled;
                status_set.failure       = audit_failure;
                status_set.pid           = audit_pid;
@@ -670,7 +771,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                                 &status_set, sizeof(status_set));
                break;
        case AUDIT_SET:
-               if (nlh->nlmsg_len < sizeof(struct audit_status))
+               if (nlmsg_len(nlh) < sizeof(struct audit_status))
                        return -EINVAL;
                status_get   = (struct audit_status *)data;
                if (status_get->mask & AUDIT_STATUS_ENABLED) {
@@ -699,6 +800,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
                        err = audit_set_backlog_limit(status_get->backlog_limit);
                break;
+       case AUDIT_GET_FEATURE:
+               err = audit_get_feature(skb);
+               if (err)
+                       return err;
+               break;
+       case AUDIT_SET_FEATURE:
+               err = audit_set_feature(skb);
+               if (err)
+                       return err;
+               break;
        case AUDIT_USER:
        case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
        case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
@@ -715,7 +826,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                        }
                        audit_log_common_recv_msg(&ab, msg_type);
                        if (msg_type != AUDIT_USER_TTY)
-                               audit_log_format(ab, " msg='%.1024s'",
+                               audit_log_format(ab, " msg='%.*s'",
+                                                AUDIT_MESSAGE_TEXT_MAX,
                                                 (char *)data);
                        else {
                                int size;
@@ -818,7 +930,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                struct task_struct *tsk = current;
 
                spin_lock(&tsk->sighand->siglock);
-               s.enabled = tsk->signal->audit_tty != 0;
+               s.enabled = tsk->signal->audit_tty;
                s.log_passwd = tsk->signal->audit_tty_log_passwd;
                spin_unlock(&tsk->sighand->siglock);
 
@@ -832,7 +944,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
                memset(&s, 0, sizeof(s));
                /* guard against past and future API changes */
-               memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len));
+               memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
                if ((s.enabled != 0 && s.enabled != 1) ||
                    (s.log_passwd != 0 && s.log_passwd != 1))
                        return -EINVAL;
@@ -1067,13 +1179,6 @@ static void wait_for_auditd(unsigned long sleep_time)
        remove_wait_queue(&audit_backlog_wait, &wait);
 }
 
-/* Obtain an audit buffer.  This routine does locking to obtain the
- * audit buffer, but then no locking is required for calls to
- * audit_log_*format.  If the tsk is a task that is currently in a
- * syscall, then the syscall is marked as auditable and an audit record
- * will be written at syscall exit.  If there is no associated task, tsk
- * should be NULL. */
-
 /**
  * audit_log_start - obtain an audit buffer
  * @ctx: audit_context (may be NULL)
@@ -1389,7 +1494,7 @@ void audit_log_session_info(struct audit_buffer *ab)
        u32 sessionid = audit_get_sessionid(current);
        uid_t auid = from_kuid(&init_user_ns, audit_get_loginuid(current));
 
-       audit_log_format(ab, " auid=%u ses=%u\n", auid, sessionid);
+       audit_log_format(ab, " auid=%u ses=%u", auid, sessionid);
 }
 
 void audit_log_key(struct audit_buffer *ab, char *key)
@@ -1536,6 +1641,26 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
                }
        }
 
+       /* log the audit_names record type */
+       audit_log_format(ab, " nametype=");
+       switch(n->type) {
+       case AUDIT_TYPE_NORMAL:
+               audit_log_format(ab, "NORMAL");
+               break;
+       case AUDIT_TYPE_PARENT:
+               audit_log_format(ab, "PARENT");
+               break;
+       case AUDIT_TYPE_CHILD_DELETE:
+               audit_log_format(ab, "DELETE");
+               break;
+       case AUDIT_TYPE_CHILD_CREATE:
+               audit_log_format(ab, "CREATE");
+               break;
+       default:
+               audit_log_format(ab, "UNKNOWN");
+               break;
+       }
+
        audit_log_fcaps(ab, n);
        audit_log_end(ab);
 }
index 123c9b7c39795975e2ce18cf913aad6b1fbe5b85..b779642b29af9401fd15246816c376ec86e4fcab 100644 (file)
@@ -197,6 +197,9 @@ struct audit_context {
                        int                     fd;
                        int                     flags;
                } mmap;
+               struct {
+                       int                     argc;
+               } execve;
        };
        int fds[2];
 
index f7aee8be7fb286db4a40919bc10ede18f17016dc..51f3fd4c1ed3a71dd1b8cd95e6645221150f5047 100644 (file)
@@ -343,6 +343,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
        case AUDIT_DEVMINOR:
        case AUDIT_EXIT:
        case AUDIT_SUCCESS:
+       case AUDIT_INODE:
                /* bit ops are only useful on syscall args */
                if (f->op == Audit_bitmask || f->op == Audit_bittest)
                        return -EINVAL;
@@ -423,7 +424,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
                f->lsm_rule = NULL;
 
                /* Support legacy tests for a valid loginuid */
-               if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) {
+               if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
                        f->type = AUDIT_LOGINUID_SET;
                        f->val = 0;
                }
index 9845cb32b60a77c8f6e3a7f1e94c8aec9dc599b2..90594c9f755213232e5282899c8547cd5c61c823 100644 (file)
@@ -95,13 +95,6 @@ struct audit_aux_data {
 /* Number of target pids per aux struct. */
 #define AUDIT_AUX_PIDS 16
 
-struct audit_aux_data_execve {
-       struct audit_aux_data   d;
-       int argc;
-       int envc;
-       struct mm_struct *mm;
-};
-
 struct audit_aux_data_pids {
        struct audit_aux_data   d;
        pid_t                   target_pid[AUDIT_AUX_PIDS];
@@ -121,12 +114,6 @@ struct audit_aux_data_bprm_fcaps {
        struct audit_cap_data   new_pcap;
 };
 
-struct audit_aux_data_capset {
-       struct audit_aux_data   d;
-       pid_t                   pid;
-       struct audit_cap_data   cap;
-};
-
 struct audit_tree_refs {
        struct audit_tree_refs *next;
        struct audit_chunk *c[31];
@@ -566,7 +553,7 @@ static int audit_filter_rules(struct task_struct *tsk,
                        break;
                case AUDIT_INODE:
                        if (name)
-                               result = (name->ino == f->val);
+                               result = audit_comparator(name->ino, f->op, f->val);
                        else if (ctx) {
                                list_for_each_entry(n, &ctx->names_list, list) {
                                        if (audit_comparator(n->ino, f->op, f->val)) {
@@ -943,8 +930,10 @@ int audit_alloc(struct task_struct *tsk)
                return 0; /* Return if not auditing. */
 
        state = audit_filter_task(tsk, &key);
-       if (state == AUDIT_DISABLED)
+       if (state == AUDIT_DISABLED) {
+               clear_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
                return 0;
+       }
 
        if (!(context = audit_alloc_context(state))) {
                kfree(key);
@@ -1149,20 +1138,16 @@ static int audit_log_single_execve_arg(struct audit_context *context,
 }
 
 static void audit_log_execve_info(struct audit_context *context,
-                                 struct audit_buffer **ab,
-                                 struct audit_aux_data_execve *axi)
+                                 struct audit_buffer **ab)
 {
        int i, len;
        size_t len_sent = 0;
        const char __user *p;
        char *buf;
 
-       if (axi->mm != current->mm)
-               return; /* execve failed, no additional info */
-
-       p = (const char __user *)axi->mm->arg_start;
+       p = (const char __user *)current->mm->arg_start;
 
-       audit_log_format(*ab, "argc=%d", axi->argc);
+       audit_log_format(*ab, "argc=%d", context->execve.argc);
 
        /*
         * we need some kernel buffer to hold the userspace args.  Just
@@ -1176,7 +1161,7 @@ static void audit_log_execve_info(struct audit_context *context,
                return;
        }
 
-       for (i = 0; i < axi->argc; i++) {
+       for (i = 0; i < context->execve.argc; i++) {
                len = audit_log_single_execve_arg(context, ab, i,
                                                  &len_sent, p, buf);
                if (len <= 0)
@@ -1279,6 +1264,9 @@ static void show_special(struct audit_context *context, int *call_panic)
                audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
                                 context->mmap.flags);
                break; }
+       case AUDIT_EXECVE: {
+               audit_log_execve_info(context, &ab);
+               break; }
        }
        audit_log_end(ab);
 }
@@ -1325,11 +1313,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
                switch (aux->type) {
 
-               case AUDIT_EXECVE: {
-                       struct audit_aux_data_execve *axi = (void *)aux;
-                       audit_log_execve_info(context, &ab, axi);
-                       break; }
-
                case AUDIT_BPRM_FCAPS: {
                        struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
                        audit_log_format(ab, "fver=%x", axs->fcap_ver);
@@ -1964,6 +1947,43 @@ int auditsc_get_stamp(struct audit_context *ctx,
 /* global counter which is incremented every time something logs in */
 static atomic_t session_id = ATOMIC_INIT(0);
 
+static int audit_set_loginuid_perm(kuid_t loginuid)
+{
+       /* if we are unset, we don't need privs */
+       if (!audit_loginuid_set(current))
+               return 0;
+       /* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/
+       if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
+               return -EPERM;
+       /* it is set, you need permission */
+       if (!capable(CAP_AUDIT_CONTROL))
+               return -EPERM;
+       /* reject if this is not an unset and we don't allow that */
+       if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
+               return -EPERM;
+       return 0;
+}
+
+static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
+                                  unsigned int oldsessionid, unsigned int sessionid,
+                                  int rc)
+{
+       struct audit_buffer *ab;
+       uid_t uid, ologinuid, nloginuid;
+
+       uid = from_kuid(&init_user_ns, task_uid(current));
+       ologinuid = from_kuid(&init_user_ns, koldloginuid);
+       nloginuid = from_kuid(&init_user_ns, kloginuid),
+
+       ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+       if (!ab)
+               return;
+       audit_log_format(ab, "pid=%d uid=%u old auid=%u new auid=%u old "
+                        "ses=%u new ses=%u res=%d", current->pid, uid, ologinuid,
+                        nloginuid, oldsessionid, sessionid, !rc);
+       audit_log_end(ab);
+}
+
 /**
  * audit_set_loginuid - set current task's audit_context loginuid
  * @loginuid: loginuid value
@@ -1975,37 +1995,26 @@ static atomic_t session_id = ATOMIC_INIT(0);
 int audit_set_loginuid(kuid_t loginuid)
 {
        struct task_struct *task = current;
-       struct audit_context *context = task->audit_context;
-       unsigned int sessionid;
+       unsigned int oldsessionid, sessionid = (unsigned int)-1;
+       kuid_t oldloginuid;
+       int rc;
 
-#ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE
-       if (audit_loginuid_set(task))
-               return -EPERM;
-#else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
-       if (!capable(CAP_AUDIT_CONTROL))
-               return -EPERM;
-#endif  /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
+       oldloginuid = audit_get_loginuid(current);
+       oldsessionid = audit_get_sessionid(current);
 
-       sessionid = atomic_inc_return(&session_id);
-       if (context && context->in_syscall) {
-               struct audit_buffer *ab;
+       rc = audit_set_loginuid_perm(loginuid);
+       if (rc)
+               goto out;
+
+       /* are we setting or clearing? */
+       if (uid_valid(loginuid))
+               sessionid = atomic_inc_return(&session_id);
 
-               ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
-               if (ab) {
-                       audit_log_format(ab, "login pid=%d uid=%u "
-                               "old auid=%u new auid=%u"
-                               " old ses=%u new ses=%u",
-                               task->pid,
-                               from_kuid(&init_user_ns, task_uid(task)),
-                               from_kuid(&init_user_ns, task->loginuid),
-                               from_kuid(&init_user_ns, loginuid),
-                               task->sessionid, sessionid);
-                       audit_log_end(ab);
-               }
-       }
        task->sessionid = sessionid;
        task->loginuid = loginuid;
-       return 0;
+out:
+       audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
+       return rc;
 }
 
 /**
@@ -2126,22 +2135,12 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
        context->ipc.has_perm = 1;
 }
 
-int __audit_bprm(struct linux_binprm *bprm)
+void __audit_bprm(struct linux_binprm *bprm)
 {
-       struct audit_aux_data_execve *ax;
        struct audit_context *context = current->audit_context;
 
-       ax = kmalloc(sizeof(*ax), GFP_KERNEL);
-       if (!ax)
-               return -ENOMEM;
-
-       ax->argc = bprm->argc;
-       ax->envc = bprm->envc;
-       ax->mm = bprm->mm;
-       ax->d.type = AUDIT_EXECVE;
-       ax->d.next = context->aux;
-       context->aux = (void *)ax;
-       return 0;
+       context->type = AUDIT_EXECVE;
+       context->execve.argc = bprm->argc;
 }
 
 
index e0839bcd48c8c2fdce9c13a3bfeba35a8e5f0e75..4c62513fe19fc8c4b938aeb8b3acb5158da2f976 100644 (file)
@@ -895,11 +895,6 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
        iput(inode);
 }
 
-static int cgroup_delete(const struct dentry *d)
-{
-       return 1;
-}
-
 static void remove_dir(struct dentry *d)
 {
        struct dentry *parent = dget(d->d_parent);
@@ -1486,7 +1481,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
 {
        static const struct dentry_operations cgroup_dops = {
                .d_iput = cgroup_diput,
-               .d_delete = cgroup_delete,
+               .d_delete = always_delete_dentry,
        };
 
        struct inode *inode =
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S
deleted file mode 100644 (file)
index 4a9a86d..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <linux/export.h>
-
-#define GLOBAL(name)   \
-       .globl VMLINUX_SYMBOL(name);    \
-       VMLINUX_SYMBOL(name):
-
-       .section ".init.data","aw"
-
-GLOBAL(modsign_certificate_list)
-       .incbin "signing_key.x509"
-       .incbin "extra_certificates"
-GLOBAL(modsign_certificate_list_end)
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
deleted file mode 100644 (file)
index 7cbd450..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Public keys for module signature verification
- *
- * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/cred.h>
-#include <linux/err.h>
-#include <keys/asymmetric-type.h>
-#include "module-internal.h"
-
-struct key *modsign_keyring;
-
-extern __initconst const u8 modsign_certificate_list[];
-extern __initconst const u8 modsign_certificate_list_end[];
-
-/*
- * We need to make sure ccache doesn't cache the .o file as it doesn't notice
- * if modsign.pub changes.
- */
-static __initconst const char annoy_ccache[] = __TIME__ "foo";
-
-/*
- * Load the compiled-in keys
- */
-static __init int module_verify_init(void)
-{
-       pr_notice("Initialise module verification\n");
-
-       modsign_keyring = keyring_alloc(".module_sign",
-                                       KUIDT_INIT(0), KGIDT_INIT(0),
-                                       current_cred(),
-                                       ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                                        KEY_USR_VIEW | KEY_USR_READ),
-                                       KEY_ALLOC_NOT_IN_QUOTA, NULL);
-       if (IS_ERR(modsign_keyring))
-               panic("Can't allocate module signing keyring\n");
-
-       return 0;
-}
-
-/*
- * Must be initialised before we try and load the keys into the keyring.
- */
-device_initcall(module_verify_init);
-
-/*
- * Load the compiled-in keys
- */
-static __init int load_module_signing_keys(void)
-{
-       key_ref_t key;
-       const u8 *p, *end;
-       size_t plen;
-
-       pr_notice("Loading module verification certificates\n");
-
-       end = modsign_certificate_list_end;
-       p = modsign_certificate_list;
-       while (p < end) {
-               /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
-                * than 256 bytes in size.
-                */
-               if (end - p < 4)
-                       goto dodgy_cert;
-               if (p[0] != 0x30 &&
-                   p[1] != 0x82)
-                       goto dodgy_cert;
-               plen = (p[2] << 8) | p[3];
-               plen += 4;
-               if (plen > end - p)
-                       goto dodgy_cert;
-
-               key = key_create_or_update(make_key_ref(modsign_keyring, 1),
-                                          "asymmetric",
-                                          NULL,
-                                          p,
-                                          plen,
-                                          (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                                          KEY_USR_VIEW,
-                                          KEY_ALLOC_NOT_IN_QUOTA);
-               if (IS_ERR(key))
-                       pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n",
-                              PTR_ERR(key));
-               else
-                       pr_notice("MODSIGN: Loaded cert '%s'\n",
-                                 key_ref_to_ptr(key)->description);
-               p += plen;
-       }
-
-       return 0;
-
-dodgy_cert:
-       pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n");
-       return 0;
-}
-late_initcall(load_module_signing_keys);
index 24f9247b7d0214d4b3755bb4396f14d9ef712753..915e123a430fbb0cb6ea9197b90ab6ec3684a191 100644 (file)
@@ -9,6 +9,4 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
-extern struct key *modsign_keyring;
-
 extern int mod_verify_sig(const void *mod, unsigned long *_modlen);
index f2970bddc5ea6224b8c0357970a543c90ac11da0..be5b8fac4bd0de72aba1f91674a2d0eb7a296d31 100644 (file)
@@ -14,6 +14,7 @@
 #include <crypto/public_key.h>
 #include <crypto/hash.h>
 #include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
 #include "module-internal.h"
 
 /*
@@ -28,7 +29,7 @@
  */
 struct module_signature {
        u8      algo;           /* Public-key crypto algorithm [enum pkey_algo] */
-       u8      hash;           /* Digest algorithm [enum pkey_hash_algo] */
+       u8      hash;           /* Digest algorithm [enum hash_algo] */
        u8      id_type;        /* Key identifier type [enum pkey_id_type] */
        u8      signer_len;     /* Length of signer's name */
        u8      key_id_len;     /* Length of key identifier */
@@ -39,7 +40,7 @@ struct module_signature {
 /*
  * Digest the module contents.
  */
-static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
+static struct public_key_signature *mod_make_digest(enum hash_algo hash,
                                                    const void *mod,
                                                    unsigned long modlen)
 {
@@ -54,7 +55,7 @@ static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
        /* Allocate the hashing algorithm we're going to need and find out how
         * big the hash operational data will be.
         */
-       tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0);
+       tfm = crypto_alloc_shash(hash_algo_name[hash], 0, 0);
        if (IS_ERR(tfm))
                return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm);
 
@@ -157,7 +158,7 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
 
        pr_debug("Look up: \"%s\"\n", id);
 
-       key = keyring_search(make_key_ref(modsign_keyring, 1),
+       key = keyring_search(make_key_ref(system_trusted_keyring, 1),
                             &key_type_asymmetric, id);
        if (IS_ERR(key))
                pr_warn("Request for unknown module key '%s' err %ld\n",
@@ -217,7 +218,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
                return -ENOPKG;
 
        if (ms.hash >= PKEY_HASH__LAST ||
-           !pkey_hash_algo[ms.hash])
+           !hash_algo_name[ms.hash])
                return -ENOPKG;
 
        key = request_asymmetric_key(sig, ms.signer_len,
index 10c22cae83a035e43eb54a79272fd1e6ae2fc757..b38109e204aff8e83afb0e15484cdfec8e450575 100644 (file)
@@ -792,7 +792,8 @@ void free_basic_memory_bitmaps(void)
 {
        struct memory_bitmap *bm1, *bm2;
 
-       BUG_ON(!(forbidden_pages_map && free_pages_map));
+       if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
+               return;
 
        bm1 = forbidden_pages_map;
        bm2 = free_pages_map;
index 24850270c8024948d60c0f827457b18d2de2d9a7..98d357584cd6bad7bdc87cbc63e5c296a8393b60 100644 (file)
@@ -70,6 +70,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
                data->swap = swsusp_resume_device ?
                        swap_type_of(swsusp_resume_device, 0, NULL) : -1;
                data->mode = O_RDONLY;
+               data->free_bitmaps = false;
                error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
                if (error)
                        pm_notifier_call_chain(PM_POST_HIBERNATION);
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S
new file mode 100644 (file)
index 0000000..4aef390
--- /dev/null
@@ -0,0 +1,10 @@
+#include <linux/export.h>
+#include <linux/init.h>
+
+       __INITRODATA
+
+       .globl VMLINUX_SYMBOL(system_certificate_list)
+VMLINUX_SYMBOL(system_certificate_list):
+       .incbin "kernel/x509_certificate_list"
+       .globl VMLINUX_SYMBOL(system_certificate_list_end)
+VMLINUX_SYMBOL(system_certificate_list_end):
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
new file mode 100644 (file)
index 0000000..564dd93
--- /dev/null
@@ -0,0 +1,105 @@
+/* System trusted keyring for trusted public keys
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include "module-internal.h"
+
+struct key *system_trusted_keyring;
+EXPORT_SYMBOL_GPL(system_trusted_keyring);
+
+extern __initconst const u8 system_certificate_list[];
+extern __initconst const u8 system_certificate_list_end[];
+
+/*
+ * Load the compiled-in keys
+ */
+static __init int system_trusted_keyring_init(void)
+{
+       pr_notice("Initialise system trusted keyring\n");
+
+       system_trusted_keyring =
+               keyring_alloc(".system_keyring",
+                             KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+                             ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                             KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+                             KEY_ALLOC_NOT_IN_QUOTA, NULL);
+       if (IS_ERR(system_trusted_keyring))
+               panic("Can't allocate system trusted keyring\n");
+
+       set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags);
+       return 0;
+}
+
+/*
+ * Must be initialised before we try and load the keys into the keyring.
+ */
+device_initcall(system_trusted_keyring_init);
+
+/*
+ * Load the compiled-in list of X.509 certificates.
+ */
+static __init int load_system_certificate_list(void)
+{
+       key_ref_t key;
+       const u8 *p, *end;
+       size_t plen;
+
+       pr_notice("Loading compiled-in X.509 certificates\n");
+
+       end = system_certificate_list_end;
+       p = system_certificate_list;
+       while (p < end) {
+               /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+                * than 256 bytes in size.
+                */
+               if (end - p < 4)
+                       goto dodgy_cert;
+               if (p[0] != 0x30 &&
+                   p[1] != 0x82)
+                       goto dodgy_cert;
+               plen = (p[2] << 8) | p[3];
+               plen += 4;
+               if (plen > end - p)
+                       goto dodgy_cert;
+
+               key = key_create_or_update(make_key_ref(system_trusted_keyring, 1),
+                                          "asymmetric",
+                                          NULL,
+                                          p,
+                                          plen,
+                                          ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                          KEY_USR_VIEW | KEY_USR_READ),
+                                          KEY_ALLOC_NOT_IN_QUOTA |
+                                          KEY_ALLOC_TRUSTED);
+               if (IS_ERR(key)) {
+                       pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+                              PTR_ERR(key));
+               } else {
+                       pr_notice("Loaded X.509 cert '%s'\n",
+                                 key_ref_to_ptr(key)->description);
+                       key_ref_put(key);
+               }
+               p += plen;
+       }
+
+       return 0;
+
+dodgy_cert:
+       pr_err("Problem parsing in-kernel X.509 certificate list\n");
+       return 0;
+}
+late_initcall(load_system_certificate_list);
index 5bbb91988e69278f2cd012896db29688c9234476..a3a0dbfda32957616f143ae2722541a5846c0a62 100644 (file)
@@ -51,6 +51,10 @@ struct user_namespace init_user_ns = {
        .owner = GLOBAL_ROOT_UID,
        .group = GLOBAL_ROOT_GID,
        .proc_inum = PROC_USER_INIT_INO,
+#ifdef CONFIG_KEYS_KERBEROS_CACHE
+       .krb_cache_register_sem =
+       __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem),
+#endif
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
index 13fb1134ba582e49c8aa3643feada72a2b0dae8b..240fb62cf3945aa0f7b601b343db65312a42f345 100644 (file)
@@ -101,6 +101,9 @@ int create_user_ns(struct cred *new)
 
        set_cred_user_ns(new, ns);
 
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       init_rwsem(&ns->persistent_keyring_register_sem);
+#endif
        return 0;
 }
 
@@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns)
 
        do {
                parent = ns->parent;
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+               key_put(ns->persistent_keyring_register);
+#endif
                proc_free_inum(ns->proc_inum);
                kmem_cache_free(user_ns_cachep, ns);
                ns = parent;
index 06dc74200a5159c5b5458c36e2b71297e638a194..991c98bc4a3f51e9e7f377274084bec909483ea9 100644 (file)
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
 config BTREE
        boolean
 
+config ASSOCIATIVE_ARRAY
+       bool
+       help
+         Generic associative array.  Can be searched and iterated over whilst
+         it is being modified.  It is also reasonably quick to search and
+         modify.  The algorithms are non-recursive, and the trees are highly
+         capacious.
+
+         See:
+
+               Documentation/assoc_array.txt
+
+         for more information.
+
 config HAS_IOMEM
        boolean
        depends on !NO_IOMEM
index d480a8c9238562b144e59217cdf4ecc05b574556..b46065fd67a4741dadeabe1ced0e67ed6269b741 100644 (file)
@@ -47,6 +47,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644 (file)
index 0000000..17edeaf
--- /dev/null
@@ -0,0 +1,1746 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+//#define DEBUG
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/assoc_array_priv.h>
+
+/*
+ * Iterate over an associative array.  The caller must hold the RCU read lock
+ * or better.
+ */
+static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
+                                      const struct assoc_array_ptr *stop,
+                                      int (*iterator)(const void *leaf,
+                                                      void *iterator_data),
+                                      void *iterator_data)
+{
+       const struct assoc_array_shortcut *shortcut;
+       const struct assoc_array_node *node;
+       const struct assoc_array_ptr *cursor, *ptr, *parent;
+       unsigned long has_meta;
+       int slot, ret;
+
+       cursor = root;
+
+begin_node:
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               /* Descend through a shortcut */
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               smp_read_barrier_depends();
+               cursor = ACCESS_ONCE(shortcut->next_node);
+       }
+
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+       slot = 0;
+
+       /* We perform two passes of each node.
+        *
+        * The first pass does all the leaves in this node.  This means we
+        * don't miss any leaves if the node is split up by insertion whilst
+        * we're iterating over the branches rooted here (we may, however, see
+        * some leaves twice).
+        */
+       has_meta = 0;
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               has_meta |= (unsigned long)ptr;
+               if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+                       /* We need a barrier between the read of the pointer
+                        * and dereferencing the pointer - but only if we are
+                        * actually going to dereference it.
+                        */
+                       smp_read_barrier_depends();
+
+                       /* Invoke the callback */
+                       ret = iterator(assoc_array_ptr_to_leaf(ptr),
+                                      iterator_data);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       /* The second pass attends to all the metadata pointers.  If we follow
+        * one of these we may find that we don't come back here, but rather go
+        * back to a replacement node with the leaves in a different layout.
+        *
+        * We are guaranteed to make progress, however, as the slot number for
+        * a particular portion of the key space cannot change - and we
+        * continue at the back pointer + 1.
+        */
+       if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
+               goto finished_node;
+       slot = 0;
+
+continue_node:
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       cursor = ptr;
+                       goto begin_node;
+               }
+       }
+
+finished_node:
+       /* Move up to the parent (may need to skip back over a shortcut) */
+       parent = ACCESS_ONCE(node->back_pointer);
+       slot = node->parent_slot;
+       if (parent == stop)
+               return 0;
+
+       if (assoc_array_ptr_is_shortcut(parent)) {
+               shortcut = assoc_array_ptr_to_shortcut(parent);
+               smp_read_barrier_depends();
+               cursor = parent;
+               parent = ACCESS_ONCE(shortcut->back_pointer);
+               slot = shortcut->parent_slot;
+               if (parent == stop)
+                       return 0;
+       }
+
+       /* Ascend to next slot in parent node */
+       cursor = parent;
+       slot++;
+       goto continue_node;
+}
+
+/**
+ * assoc_array_iterate - Pass all objects in the array to a callback
+ * @array: The array to iterate over.
+ * @iterator: The callback function.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Iterate over all the objects in an associative array.  Each one will be
+ * presented to the iterator function.
+ *
+ * If the array is being modified concurrently with the iteration then it is
+ * possible that some objects in the array will be passed to the iterator
+ * callback more than once - though every object should be passed at least
+ * once.  If this is undesirable then the caller must lock against modification
+ * for the duration of this function.
+ *
+ * The function will return 0 if no objects were in the array or else it will
+ * return the result of the last iterator function called.  Iteration stops
+ * immediately if any call to the iteration function results in a non-zero
+ * return.
+ *
+ * The caller should hold the RCU read lock or better if concurrent
+ * modification is possible.
+ */
+int assoc_array_iterate(const struct assoc_array *array,
+                       int (*iterator)(const void *object,
+                                       void *iterator_data),
+                       void *iterator_data)
+{
+       struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
+
+       if (!root)
+               return 0;
+       return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
+}
+
+enum assoc_array_walk_status {
+       assoc_array_walk_tree_empty,
+       assoc_array_walk_found_terminal_node,
+       assoc_array_walk_found_wrong_shortcut,
+} status;
+
+struct assoc_array_walk_result {
+       struct {
+               struct assoc_array_node *node;  /* Node in which leaf might be found */
+               int             level;
+               int             slot;
+       } terminal_node;
+       struct {
+               struct assoc_array_shortcut *shortcut;
+               int             level;
+               int             sc_level;
+               unsigned long   sc_segments;
+               unsigned long   dissimilarity;
+       } wrong_shortcut;
+};
+
+/*
+ * Navigate through the internal tree looking for the closest node to the key.
+ */
+static enum assoc_array_walk_status
+assoc_array_walk(const struct assoc_array *array,
+                const struct assoc_array_ops *ops,
+                const void *index_key,
+                struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *cursor, *ptr;
+       unsigned long sc_segments, dissimilarity;
+       unsigned long segments;
+       int level, sc_level, next_sc_level;
+       int slot;
+
+       pr_devel("-->%s()\n", __func__);
+
+       cursor = ACCESS_ONCE(array->root);
+       if (!cursor)
+               return assoc_array_walk_tree_empty;
+
+       level = 0;
+
+       /* Use segments from the key for the new leaf to navigate through the
+        * internal tree, skipping through nodes and shortcuts that are on
+        * route to the destination.  Eventually we'll come to a slot that is
+        * either empty or contains a leaf at which point we've found a node in
+        * which the leaf we're looking for might be found or into which it
+        * should be inserted.
+        */
+jumped:
+       segments = ops->get_key_chunk(index_key, level);
+       pr_devel("segments[%d]: %lx\n", level, segments);
+
+       if (assoc_array_ptr_is_shortcut(cursor))
+               goto follow_shortcut;
+
+consider_node:
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+
+       slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       slot &= ASSOC_ARRAY_FAN_MASK;
+       ptr = ACCESS_ONCE(node->slots[slot]);
+
+       pr_devel("consider slot %x [ix=%d type=%lu]\n",
+                slot, level, (unsigned long)ptr & 3);
+
+       if (!assoc_array_ptr_is_meta(ptr)) {
+               /* The node doesn't have a node/shortcut pointer in the slot
+                * corresponding to the index key that we have to follow.
+                */
+               result->terminal_node.node = node;
+               result->terminal_node.level = level;
+               result->terminal_node.slot = slot;
+               pr_devel("<--%s() = terminal_node\n", __func__);
+               return assoc_array_walk_found_terminal_node;
+       }
+
+       if (assoc_array_ptr_is_node(ptr)) {
+               /* There is a pointer to a node in the slot corresponding to
+                * this index key segment, so we need to follow it.
+                */
+               cursor = ptr;
+               level += ASSOC_ARRAY_LEVEL_STEP;
+               if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
+                       goto consider_node;
+               goto jumped;
+       }
+
+       /* There is a shortcut in the slot corresponding to the index key
+        * segment.  We follow the shortcut if its partial index key matches
+        * this leaf's.  Otherwise we need to split the shortcut.
+        */
+       cursor = ptr;
+follow_shortcut:
+       shortcut = assoc_array_ptr_to_shortcut(cursor);
+       smp_read_barrier_depends();
+       pr_devel("shortcut to %d\n", shortcut->skip_to_level);
+       sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
+       BUG_ON(sc_level > shortcut->skip_to_level);
+
+       do {
+               /* Check the leaf against the shortcut's index key a word at a
+                * time, trimming the final word (the shortcut stores the index
+                * key completely from the root to the shortcut's target).
+                */
+               if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
+                       segments = ops->get_key_chunk(index_key, sc_level);
+
+               sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
+               dissimilarity = segments ^ sc_segments;
+
+               if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
+                       /* Trim segments that are beyond the shortcut */
+                       int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+                       dissimilarity &= ~(ULONG_MAX << shift);
+                       next_sc_level = shortcut->skip_to_level;
+               } else {
+                       next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
+                       next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               }
+
+               if (dissimilarity != 0) {
+                       /* This shortcut points elsewhere */
+                       result->wrong_shortcut.shortcut = shortcut;
+                       result->wrong_shortcut.level = level;
+                       result->wrong_shortcut.sc_level = sc_level;
+                       result->wrong_shortcut.sc_segments = sc_segments;
+                       result->wrong_shortcut.dissimilarity = dissimilarity;
+                       return assoc_array_walk_found_wrong_shortcut;
+               }
+
+               sc_level = next_sc_level;
+       } while (sc_level < shortcut->skip_to_level);
+
+       /* The shortcut matches the leaf's index to this point. */
+       cursor = ACCESS_ONCE(shortcut->next_node);
+       if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
+               level = sc_level;
+               goto jumped;
+       } else {
+               level = sc_level;
+               goto consider_node;
+       }
+}
+
+/**
+ * assoc_array_find - Find an object by index key
+ * @array: The associative array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Find an object in an associative array by walking through the internal tree
+ * to the node that should contain the object and then searching the leaves
+ * there.  NULL is returned if the requested object was not found in the array.
+ *
+ * The caller must hold the RCU read lock or better.
+ */
+void *assoc_array_find(const struct assoc_array *array,
+                      const struct assoc_array_ops *ops,
+                      const void *index_key)
+{
+       struct assoc_array_walk_result result;
+       const struct assoc_array_node *node;
+       const struct assoc_array_ptr *ptr;
+       const void *leaf;
+       int slot;
+
+       if (assoc_array_walk(array, ops, index_key, &result) !=
+           assoc_array_walk_found_terminal_node)
+               return NULL;
+
+       node = result.terminal_node.node;
+       smp_read_barrier_depends();
+
+       /* If the target key is available to us, it's has to be pointed to by
+        * the terminal node.
+        */
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+                       /* We need a barrier between the read of the pointer
+                        * and dereferencing the pointer - but only if we are
+                        * actually going to dereference it.
+                        */
+                       leaf = assoc_array_ptr_to_leaf(ptr);
+                       smp_read_barrier_depends();
+                       if (ops->compare_object(leaf, index_key))
+                               return (void *)leaf;
+               }
+       }
+
+       return NULL;
+}
+
+/*
+ * Destructively iterate over an associative array.  The caller must prevent
+ * other simultaneous accesses.
+ */
+static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
+                                       const struct assoc_array_ops *ops)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *cursor, *parent = NULL;
+       int slot = -1;
+
+       pr_devel("-->%s()\n", __func__);
+
+       cursor = root;
+       if (!cursor) {
+               pr_devel("empty\n");
+               return;
+       }
+
+move_to_meta:
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               /* Descend through a shortcut */
+               pr_devel("[%d] shortcut\n", slot);
+               BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               BUG_ON(shortcut->back_pointer != parent);
+               BUG_ON(slot != -1 && shortcut->parent_slot != slot);
+               parent = cursor;
+               cursor = shortcut->next_node;
+               slot = -1;
+               BUG_ON(!assoc_array_ptr_is_node(cursor));
+       }
+
+       pr_devel("[%d] node\n", slot);
+       node = assoc_array_ptr_to_node(cursor);
+       BUG_ON(node->back_pointer != parent);
+       BUG_ON(slot != -1 && node->parent_slot != slot);
+       slot = 0;
+
+continue_node:
+       pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               struct assoc_array_ptr *ptr = node->slots[slot];
+               if (!ptr)
+                       continue;
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       parent = cursor;
+                       cursor = ptr;
+                       goto move_to_meta;
+               }
+
+               if (ops) {
+                       pr_devel("[%d] free leaf\n", slot);
+                       ops->free_object(assoc_array_ptr_to_leaf(ptr));
+               }
+       }
+
+       parent = node->back_pointer;
+       slot = node->parent_slot;
+       pr_devel("free node\n");
+       kfree(node);
+       if (!parent)
+               return; /* Done */
+
+       /* Move back up to the parent (may need to free a shortcut on
+        * the way up) */
+       if (assoc_array_ptr_is_shortcut(parent)) {
+               shortcut = assoc_array_ptr_to_shortcut(parent);
+               BUG_ON(shortcut->next_node != cursor);
+               cursor = parent;
+               parent = shortcut->back_pointer;
+               slot = shortcut->parent_slot;
+               pr_devel("free shortcut\n");
+               kfree(shortcut);
+               if (!parent)
+                       return;
+
+               BUG_ON(!assoc_array_ptr_is_node(parent));
+       }
+
+       /* Ascend to next slot in parent node */
+       pr_devel("ascend to %p[%d]\n", parent, slot);
+       cursor = parent;
+       node = assoc_array_ptr_to_node(cursor);
+       slot++;
+       goto continue_node;
+}
+
+/**
+ * assoc_array_destroy - Destroy an associative array
+ * @array: The array to destroy.
+ * @ops: The operations to use.
+ *
+ * Discard all metadata and free all objects in an associative array.  The
+ * array will be empty and ready to use again upon completion.  This function
+ * cannot fail.
+ *
+ * The caller must prevent all other accesses whilst this takes place as no
+ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
+ * accesses to continue.  On the other hand, no memory allocation is required.
+ */
+void assoc_array_destroy(struct assoc_array *array,
+                        const struct assoc_array_ops *ops)
+{
+       assoc_array_destroy_subtree(array->root, ops);
+       array->root = NULL;
+}
+
+/*
+ * Handle insertion into an empty tree.
+ */
+static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
+{
+       struct assoc_array_node *new_n0;
+
+       pr_devel("-->%s()\n", __func__);
+
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       edit->leaf_p = &new_n0->slots[0];
+       edit->adjust_count_on = new_n0;
+       edit->set[0].ptr = &edit->array->root;
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+
+       pr_devel("<--%s() = ok [no root]\n", __func__);
+       return true;
+}
+
+/*
+ * Handle insertion into a terminal node.
+ */
+static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
+                                                 const struct assoc_array_ops *ops,
+                                                 const void *index_key,
+                                                 struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut, *new_s0;
+       struct assoc_array_node *node, *new_n0, *new_n1, *side;
+       struct assoc_array_ptr *ptr;
+       unsigned long dissimilarity, base_seg, blank;
+       size_t keylen;
+       bool have_meta;
+       int level, diff;
+       int slot, next_slot, free_slot, i, j;
+
+       node    = result->terminal_node.node;
+       level   = result->terminal_node.level;
+       edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* We arrived at a node which doesn't have an onward node or shortcut
+        * pointer that we have to follow.  This means that (a) the leaf we
+        * want must go here (either by insertion or replacement) or (b) we
+        * need to split this node and insert in one of the fragments.
+        */
+       free_slot = -1;
+
+       /* Firstly, we have to check the leaves in this node to see if there's
+        * a matching one we should replace in place.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               if (!ptr) {
+                       free_slot = i;
+                       continue;
+               }
+               if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+                       pr_devel("replace in slot %d\n", i);
+                       edit->leaf_p = &node->slots[i];
+                       edit->dead_leaf = node->slots[i];
+                       pr_devel("<--%s() = ok [replace]\n", __func__);
+                       return true;
+               }
+       }
+
+       /* If there is a free slot in this node then we can just insert the
+        * leaf here.
+        */
+       if (free_slot >= 0) {
+               pr_devel("insert in free slot %d\n", free_slot);
+               edit->leaf_p = &node->slots[free_slot];
+               edit->adjust_count_on = node;
+               pr_devel("<--%s() = ok [insert]\n", __func__);
+               return true;
+       }
+
+       /* The node has no spare slots - so we're either going to have to split
+        * it or insert another node before it.
+        *
+        * Whatever, we're going to need at least two new nodes - so allocate
+        * those now.  We may also need a new shortcut, but we deal with that
+        * when we need it.
+        */
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n1)
+               return false;
+       edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
+
+       /* We need to find out how similar the leaves are. */
+       pr_devel("no spare slots\n");
+       have_meta = false;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       edit->segment_cache[i] = 0xff;
+                       have_meta = true;
+                       continue;
+               }
+               base_seg = ops->get_object_key_chunk(
+                       assoc_array_ptr_to_leaf(ptr), level);
+               base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+               edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       }
+
+       if (have_meta) {
+               pr_devel("have meta\n");
+               goto split_node;
+       }
+
+       /* The node contains only leaves */
+       dissimilarity = 0;
+       base_seg = edit->segment_cache[0];
+       for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
+               dissimilarity |= edit->segment_cache[i] ^ base_seg;
+
+       pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
+
+       if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
+               /* The old leaves all cluster in the same slot.  We will need
+                * to insert a shortcut if the new node wants to cluster with them.
+                */
+               if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
+                       goto all_leaves_cluster_together;
+
+               /* Otherwise we can just insert a new node ahead of the old
+                * one.
+                */
+               goto present_leaves_cluster_but_not_new_leaf;
+       }
+
+split_node:
+       pr_devel("split node\n");
+
+       /* We need to split the current node; we know that the node doesn't
+        * simply contain a full set of leaves that cluster together (it
+        * contains meta pointers and/or non-clustering leaves).
+        *
+        * We need to expel at least two leaves out of a set consisting of the
+        * leaves in the node and the new leaf.
+        *
+        * We need a new node (n0) to replace the current one and a new node to
+        * take the expelled nodes (n1).
+        */
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+       new_n0->back_pointer = node->back_pointer;
+       new_n0->parent_slot = node->parent_slot;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = -1; /* Need to calculate this */
+
+do_split_node:
+       pr_devel("do_split_node\n");
+
+       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       new_n1->nr_leaves_on_branch = 0;
+
+       /* Begin by finding two matching leaves.  There have to be at least two
+        * that match - even if there are meta pointers - because any leaf that
+        * would match a slot with a meta pointer in it must be somewhere
+        * behind that meta pointer and cannot be here.  Further, given N
+        * remaining leaf slots, we now have N+1 leaves to go in them.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               slot = edit->segment_cache[i];
+               if (slot != 0xff)
+                       for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
+                               if (edit->segment_cache[j] == slot)
+                                       goto found_slot_for_multiple_occupancy;
+       }
+found_slot_for_multiple_occupancy:
+       pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
+       BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
+       BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
+       BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
+
+       new_n1->parent_slot = slot;
+
+       /* Metadata pointers cannot change slot */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+               if (assoc_array_ptr_is_meta(node->slots[i]))
+                       new_n0->slots[i] = node->slots[i];
+               else
+                       new_n0->slots[i] = NULL;
+       BUG_ON(new_n0->slots[slot] != NULL);
+       new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
+
+       /* Filter the leaf pointers between the new nodes */
+       free_slot = -1;
+       next_slot = 0;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               if (assoc_array_ptr_is_meta(node->slots[i]))
+                       continue;
+               if (edit->segment_cache[i] == slot) {
+                       new_n1->slots[next_slot++] = node->slots[i];
+                       new_n1->nr_leaves_on_branch++;
+               } else {
+                       do {
+                               free_slot++;
+                       } while (new_n0->slots[free_slot] != NULL);
+                       new_n0->slots[free_slot] = node->slots[i];
+               }
+       }
+
+       pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
+
+       if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
+               do {
+                       free_slot++;
+               } while (new_n0->slots[free_slot] != NULL);
+               edit->leaf_p = &new_n0->slots[free_slot];
+               edit->adjust_count_on = new_n0;
+       } else {
+               edit->leaf_p = &new_n1->slots[next_slot++];
+               edit->adjust_count_on = new_n1;
+       }
+
+       BUG_ON(next_slot <= 1);
+
+       edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               if (edit->segment_cache[i] == 0xff) {
+                       ptr = node->slots[i];
+                       BUG_ON(assoc_array_ptr_is_leaf(ptr));
+                       if (assoc_array_ptr_is_node(ptr)) {
+                               side = assoc_array_ptr_to_node(ptr);
+                               edit->set_backpointers[i] = &side->back_pointer;
+                       } else {
+                               shortcut = assoc_array_ptr_to_shortcut(ptr);
+                               edit->set_backpointers[i] = &shortcut->back_pointer;
+                       }
+               }
+       }
+
+       ptr = node->back_pointer;
+       if (!ptr)
+               edit->set[0].ptr = &edit->array->root;
+       else if (assoc_array_ptr_is_node(ptr))
+               edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
+       else
+               edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
+       edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+       pr_devel("<--%s() = ok [split node]\n", __func__);
+       return true;
+
+present_leaves_cluster_but_not_new_leaf:
+       /* All the old leaves cluster in the same slot, but the new leaf wants
+        * to go into a different slot, so we create a new node to hold the new
+        * leaf and a pointer to a new node holding all the old leaves.
+        */
+       pr_devel("present leaves cluster but not new leaf\n");
+
+       new_n0->back_pointer = node->back_pointer;
+       new_n0->parent_slot = node->parent_slot;
+       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = edit->segment_cache[0];
+       new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       edit->adjust_count_on = new_n0;
+
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+               new_n1->slots[i] = node->slots[i];
+
+       new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
+       edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
+
+       edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+       edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+       pr_devel("<--%s() = ok [insert node before]\n", __func__);
+       return true;
+
+all_leaves_cluster_together:
+       /* All the leaves, new and old, want to cluster together in this node
+        * in the same slot, so we have to replace this node with a shortcut to
+        * skip over the identical parts of the key and then place a pair of
+        * nodes, one inside the other, at the end of the shortcut and
+        * distribute the keys between them.
+        *
+        * Firstly we need to work out where the leaves start diverging as a
+        * bit position into their keys so that we know how big the shortcut
+        * needs to be.
+        *
+        * We only need to make a single pass of N of the N+1 leaves because if
+        * any keys differ between themselves at bit X then at least one of
+        * them must also differ with the base key at bit X or before.
+        */
+       pr_devel("all leaves cluster together\n");
+       diff = INT_MAX;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
+                                         assoc_array_ptr_to_leaf(node->slots[i]));
+               if (x < diff) {
+                       BUG_ON(x < 0);
+                       diff = x;
+               }
+       }
+       BUG_ON(diff == INT_MAX);
+       BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
+
+       keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+       keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+       new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                        keylen * sizeof(unsigned long), GFP_KERNEL);
+       if (!new_s0)
+               return false;
+       edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
+
+       edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+       new_s0->back_pointer = node->back_pointer;
+       new_s0->parent_slot = node->parent_slot;
+       new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+       new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+       new_n0->parent_slot = 0;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = -1; /* Need to calculate this */
+
+       new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+       pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
+       BUG_ON(level <= 0);
+
+       for (i = 0; i < keylen; i++)
+               new_s0->index_key[i] =
+                       ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
+
+       blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
+       new_s0->index_key[keylen - 1] &= ~blank;
+
+       /* This now reduces to a node splitting exercise for which we'll need
+        * to regenerate the disparity table.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
+                                                    level);
+               base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+               edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       }
+
+       base_seg = ops->get_key_chunk(index_key, level);
+       base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+       edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       goto do_split_node;
+}
+
+/*
+ * Handle insertion into the middle of a shortcut.
+ */
+static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
+                                           const struct assoc_array_ops *ops,
+                                           struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
+       struct assoc_array_node *node, *new_n0, *side;
+       unsigned long sc_segments, dissimilarity, blank;
+       size_t keylen;
+       int level, sc_level, diff;
+       int sc_slot;
+
+       shortcut        = result->wrong_shortcut.shortcut;
+       level           = result->wrong_shortcut.level;
+       sc_level        = result->wrong_shortcut.sc_level;
+       sc_segments     = result->wrong_shortcut.sc_segments;
+       dissimilarity   = result->wrong_shortcut.dissimilarity;
+
+       pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
+                __func__, level, dissimilarity, sc_level);
+
+       /* We need to split a shortcut and insert a node between the two
+        * pieces.  Zero-length pieces will be dispensed with entirely.
+        *
+        * First of all, we need to find out in which level the first
+        * difference was.
+        */
+       diff = __ffs(dissimilarity);
+       diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+       diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
+       pr_devel("diff=%d\n", diff);
+
+       if (!shortcut->back_pointer) {
+               edit->set[0].ptr = &edit->array->root;
+       } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
+               node = assoc_array_ptr_to_node(shortcut->back_pointer);
+               edit->set[0].ptr = &node->slots[shortcut->parent_slot];
+       } else {
+               BUG();
+       }
+
+       edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
+
+       /* Create a new node now since we're going to need it anyway */
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       edit->adjust_count_on = new_n0;
+
+       /* Insert a new shortcut before the new node if this segment isn't of
+        * zero length - otherwise we just connect the new node directly to the
+        * parent.
+        */
+       level += ASSOC_ARRAY_LEVEL_STEP;
+       if (diff > level) {
+               pr_devel("pre-shortcut %d...%d\n", level, diff);
+               keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+               new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                                keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s0)
+                       return false;
+               edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
+               edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+               new_s0->back_pointer = shortcut->back_pointer;
+               new_s0->parent_slot = shortcut->parent_slot;
+               new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+               new_s0->skip_to_level = diff;
+
+               new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+               new_n0->parent_slot = 0;
+
+               memcpy(new_s0->index_key, shortcut->index_key,
+                      keylen * sizeof(unsigned long));
+
+               blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+               pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
+               new_s0->index_key[keylen - 1] &= ~blank;
+       } else {
+               pr_devel("no pre-shortcut\n");
+               edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+               new_n0->back_pointer = shortcut->back_pointer;
+               new_n0->parent_slot = shortcut->parent_slot;
+       }
+
+       side = assoc_array_ptr_to_node(shortcut->next_node);
+       new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
+
+       /* We need to know which slot in the new node is going to take a
+        * metadata pointer.
+        */
+       sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       sc_slot &= ASSOC_ARRAY_FAN_MASK;
+
+       pr_devel("new slot %lx >> %d -> %d\n",
+                sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
+
+       /* Determine whether we need to follow the new node with a replacement
+        * for the current shortcut.  We could in theory reuse the current
+        * shortcut if its parent slot number doesn't change - but that's a
+        * 1-in-16 chance so not worth expending the code upon.
+        */
+       level = diff + ASSOC_ARRAY_LEVEL_STEP;
+       if (level < shortcut->skip_to_level) {
+               pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
+               keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+               new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                                keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s1)
+                       return false;
+               edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
+
+               new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
+               new_s1->parent_slot = sc_slot;
+               new_s1->next_node = shortcut->next_node;
+               new_s1->skip_to_level = shortcut->skip_to_level;
+
+               new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
+
+               memcpy(new_s1->index_key, shortcut->index_key,
+                      keylen * sizeof(unsigned long));
+
+               edit->set[1].ptr = &side->back_pointer;
+               edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
+       } else {
+               pr_devel("no post-shortcut\n");
+
+               /* We don't have to replace the pointed-to node as long as we
+                * use memory barriers to make sure the parent slot number is
+                * changed before the back pointer (the parent slot number is
+                * irrelevant to the old parent shortcut).
+                */
+               new_n0->slots[sc_slot] = shortcut->next_node;
+               edit->set_parent_slot[0].p = &side->parent_slot;
+               edit->set_parent_slot[0].to = sc_slot;
+               edit->set[1].ptr = &side->back_pointer;
+               edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+       }
+
+       /* Install the new leaf in a spare slot in the new node. */
+       if (sc_slot == 0)
+               edit->leaf_p = &new_n0->slots[1];
+       else
+               edit->leaf_p = &new_n0->slots[0];
+
+       pr_devel("<--%s() = ok [split shortcut]\n", __func__);
+       return edit;
+}
+
+/**
+ * assoc_array_insert - Script insertion of an object into an associative array
+ * @array: The array to insert into.
+ * @ops: The operations to use.
+ * @index_key: The key to insert at.
+ * @object: The object to insert.
+ *
+ * Precalculate and preallocate a script for the insertion or replacement of an
+ * object in an associative array.  This results in an edit script that can
+ * either be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+                                           const struct assoc_array_ops *ops,
+                                           const void *index_key,
+                                           void *object)
+{
+       struct assoc_array_walk_result result;
+       struct assoc_array_edit *edit;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* The leaf pointer we're given must not have the bottom bit set as we
+        * use those for type-marking the pointer.  NULL pointers are also not
+        * allowed as they indicate an empty slot but we have to allow them
+        * here as they can be updated later.
+        */
+       BUG_ON(assoc_array_ptr_is_meta(object));
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->leaf = assoc_array_leaf_to_ptr(object);
+       edit->adjust_count_by = 1;
+
+       switch (assoc_array_walk(array, ops, index_key, &result)) {
+       case assoc_array_walk_tree_empty:
+               /* Allocate a root node if there isn't one yet */
+               if (!assoc_array_insert_in_empty_tree(edit))
+                       goto enomem;
+               return edit;
+
+       case assoc_array_walk_found_terminal_node:
+               /* We found a node that doesn't have a node/shortcut pointer in
+                * the slot corresponding to the index key that we have to
+                * follow.
+                */
+               if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
+                                                          &result))
+                       goto enomem;
+               return edit;
+
+       case assoc_array_walk_found_wrong_shortcut:
+               /* We found a shortcut that didn't match our key in a slot we
+                * needed to follow.
+                */
+               if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
+                       goto enomem;
+               return edit;
+       }
+
+enomem:
+       /* Clean up after an out of memory error */
+       pr_devel("enomem\n");
+       assoc_array_cancel_edit(edit);
+       return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_insert_set_object - Set the new object pointer in an edit script
+ * @edit: The edit script to modify.
+ * @object: The object pointer to set.
+ *
+ * Change the object to be inserted in an edit script.  The object pointed to
+ * by the old object is not freed.  This must be done prior to applying the
+ * script.
+ */
+void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
+{
+       BUG_ON(!object);
+       edit->leaf = assoc_array_leaf_to_ptr(object);
+}
+
+struct assoc_array_delete_collapse_context {
+       struct assoc_array_node *node;
+       const void              *skip_leaf;
+       int                     slot;
+};
+
+/*
+ * Subtree collapse to node iterator.
+ */
+static int assoc_array_delete_collapse_iterator(const void *leaf,
+                                               void *iterator_data)
+{
+       struct assoc_array_delete_collapse_context *collapse = iterator_data;
+
+       if (leaf == collapse->skip_leaf)
+               return 0;
+
+       BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
+
+       collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
+       return 0;
+}
+
+/**
+ * assoc_array_delete - Script deletion of an object from an associative array
+ * @array: The array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Precalculate and preallocate a script for the deletion of an object from an
+ * associative array.  This results in an edit script that can either be
+ * applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if the object was found,
+ * NULL if the object was not found or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+                                           const struct assoc_array_ops *ops,
+                                           const void *index_key)
+{
+       struct assoc_array_delete_collapse_context collapse;
+       struct assoc_array_walk_result result;
+       struct assoc_array_node *node, *new_n0;
+       struct assoc_array_edit *edit;
+       struct assoc_array_ptr *ptr;
+       bool has_meta;
+       int slot, i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->adjust_count_by = -1;
+
+       switch (assoc_array_walk(array, ops, index_key, &result)) {
+       case assoc_array_walk_found_terminal_node:
+               /* We found a node that should contain the leaf we've been
+                * asked to remove - *if* it's in the tree.
+                */
+               pr_devel("terminal_node\n");
+               node = result.terminal_node.node;
+
+               for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                       ptr = node->slots[slot];
+                       if (ptr &&
+                           assoc_array_ptr_is_leaf(ptr) &&
+                           ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+                                               index_key))
+                               goto found_leaf;
+               }
+       case assoc_array_walk_tree_empty:
+       case assoc_array_walk_found_wrong_shortcut:
+       default:
+               assoc_array_cancel_edit(edit);
+               pr_devel("not found\n");
+               return NULL;
+       }
+
+found_leaf:
+       BUG_ON(array->nr_leaves_on_tree <= 0);
+
+       /* In the simplest form of deletion we just clear the slot and release
+        * the leaf after a suitable interval.
+        */
+       edit->dead_leaf = node->slots[slot];
+       edit->set[0].ptr = &node->slots[slot];
+       edit->set[0].to = NULL;
+       edit->adjust_count_on = node;
+
+       /* If that concludes erasure of the last leaf, then delete the entire
+        * internal array.
+        */
+       if (array->nr_leaves_on_tree == 1) {
+               edit->set[1].ptr = &array->root;
+               edit->set[1].to = NULL;
+               edit->adjust_count_on = NULL;
+               edit->excised_subtree = array->root;
+               pr_devel("all gone\n");
+               return edit;
+       }
+
+       /* However, we'd also like to clear up some metadata blocks if we
+        * possibly can.
+        *
+        * We go for a simple algorithm of: if this node has FAN_OUT or fewer
+        * leaves in it, then attempt to collapse it - and attempt to
+        * recursively collapse up the tree.
+        *
+        * We could also try and collapse in partially filled subtrees to take
+        * up space in this node.
+        */
+       if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+               struct assoc_array_node *parent, *grandparent;
+               struct assoc_array_ptr *ptr;
+
+               /* First of all, we need to know if this node has metadata so
+                * that we don't try collapsing if all the leaves are already
+                * here.
+                */
+               has_meta = false;
+               for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                       ptr = node->slots[i];
+                       if (assoc_array_ptr_is_meta(ptr)) {
+                               has_meta = true;
+                               break;
+                       }
+               }
+
+               pr_devel("leaves: %ld [m=%d]\n",
+                        node->nr_leaves_on_branch - 1, has_meta);
+
+               /* Look further up the tree to see if we can collapse this node
+                * into a more proximal node too.
+                */
+               parent = node;
+       collapse_up:
+               pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
+
+               ptr = parent->back_pointer;
+               if (!ptr)
+                       goto do_collapse;
+               if (assoc_array_ptr_is_shortcut(ptr)) {
+                       struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
+                       ptr = s->back_pointer;
+                       if (!ptr)
+                               goto do_collapse;
+               }
+
+               grandparent = assoc_array_ptr_to_node(ptr);
+               if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+                       parent = grandparent;
+                       goto collapse_up;
+               }
+
+       do_collapse:
+               /* There's no point collapsing if the original node has no meta
+                * pointers to discard and if we didn't merge into one of that
+                * node's ancestry.
+                */
+               if (has_meta || parent != node) {
+                       node = parent;
+
+                       /* Create a new node to collapse into */
+                       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+                       if (!new_n0)
+                               goto enomem;
+                       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+
+                       new_n0->back_pointer = node->back_pointer;
+                       new_n0->parent_slot = node->parent_slot;
+                       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+                       edit->adjust_count_on = new_n0;
+
+                       collapse.node = new_n0;
+                       collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
+                       collapse.slot = 0;
+                       assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
+                                                   node->back_pointer,
+                                                   assoc_array_delete_collapse_iterator,
+                                                   &collapse);
+                       pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
+                       BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
+
+                       if (!node->back_pointer) {
+                               edit->set[1].ptr = &array->root;
+                       } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
+                               BUG();
+                       } else if (assoc_array_ptr_is_node(node->back_pointer)) {
+                               struct assoc_array_node *p =
+                                       assoc_array_ptr_to_node(node->back_pointer);
+                               edit->set[1].ptr = &p->slots[node->parent_slot];
+                       } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
+                               struct assoc_array_shortcut *s =
+                                       assoc_array_ptr_to_shortcut(node->back_pointer);
+                               edit->set[1].ptr = &s->next_node;
+                       }
+                       edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+                       edit->excised_subtree = assoc_array_node_to_ptr(node);
+               }
+       }
+
+       return edit;
+
+enomem:
+       /* Clean up after an out of memory error */
+       pr_devel("enomem\n");
+       assoc_array_cancel_edit(edit);
+       return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_clear - Script deletion of all objects from an associative array
+ * @array: The array to clear.
+ * @ops: The operations to use.
+ *
+ * Precalculate and preallocate a script for the deletion of all the objects
+ * from an associative array.  This results in an edit script that can either
+ * be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if there are objects to be
+ * deleted, NULL if there are no objects in the array or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+                                          const struct assoc_array_ops *ops)
+{
+       struct assoc_array_edit *edit;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (!array->root)
+               return NULL;
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->set[1].ptr = &array->root;
+       edit->set[1].to = NULL;
+       edit->excised_subtree = array->root;
+       edit->ops_for_excised_subtree = ops;
+       pr_devel("all gone\n");
+       return edit;
+}
+
+/*
+ * Handle the deferred destruction after an applied edit.
+ */
+static void assoc_array_rcu_cleanup(struct rcu_head *head)
+{
+       struct assoc_array_edit *edit =
+               container_of(head, struct assoc_array_edit, rcu);
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (edit->dead_leaf)
+               edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
+       for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
+               if (edit->excised_meta[i])
+                       kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
+
+       if (edit->excised_subtree) {
+               BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
+               if (assoc_array_ptr_is_node(edit->excised_subtree)) {
+                       struct assoc_array_node *n =
+                               assoc_array_ptr_to_node(edit->excised_subtree);
+                       n->back_pointer = NULL;
+               } else {
+                       struct assoc_array_shortcut *s =
+                               assoc_array_ptr_to_shortcut(edit->excised_subtree);
+                       s->back_pointer = NULL;
+               }
+               assoc_array_destroy_subtree(edit->excised_subtree,
+                                           edit->ops_for_excised_subtree);
+       }
+
+       kfree(edit);
+}
+
+/**
+ * assoc_array_apply_edit - Apply an edit script to an associative array
+ * @edit: The script to apply.
+ *
+ * Apply an edit script to an associative array to effect an insertion,
+ * deletion or clearance.  As the edit script includes preallocated memory,
+ * this is guaranteed not to fail.
+ *
+ * The edit script, dead objects and dead metadata will be scheduled for
+ * destruction after an RCU grace period to permit those doing read-only
+ * accesses on the array to continue to do so under the RCU read lock whilst
+ * the edit is taking place.
+ */
+void assoc_array_apply_edit(struct assoc_array_edit *edit)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *ptr;
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       smp_wmb();
+       if (edit->leaf_p)
+               *edit->leaf_p = edit->leaf;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
+               if (edit->set_parent_slot[i].p)
+                       *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
+               if (edit->set_backpointers[i])
+                       *edit->set_backpointers[i] = edit->set_backpointers_to;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set); i++)
+               if (edit->set[i].ptr)
+                       *edit->set[i].ptr = edit->set[i].to;
+
+       if (edit->array->root == NULL) {
+               edit->array->nr_leaves_on_tree = 0;
+       } else if (edit->adjust_count_on) {
+               node = edit->adjust_count_on;
+               for (;;) {
+                       node->nr_leaves_on_branch += edit->adjust_count_by;
+
+                       ptr = node->back_pointer;
+                       if (!ptr)
+                               break;
+                       if (assoc_array_ptr_is_shortcut(ptr)) {
+                               shortcut = assoc_array_ptr_to_shortcut(ptr);
+                               ptr = shortcut->back_pointer;
+                               if (!ptr)
+                                       break;
+                       }
+                       BUG_ON(!assoc_array_ptr_is_node(ptr));
+                       node = assoc_array_ptr_to_node(ptr);
+               }
+
+               edit->array->nr_leaves_on_tree += edit->adjust_count_by;
+       }
+
+       call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
+}
+
+/**
+ * assoc_array_cancel_edit - Discard an edit script.
+ * @edit: The script to discard.
+ *
+ * Free an edit script and all the preallocated data it holds without making
+ * any changes to the associative array it was intended for.
+ *
+ * NOTE!  In the case of an insertion script, this does _not_ release the leaf
+ * that was to be inserted.  That is left to the caller.
+ */
+void assoc_array_cancel_edit(struct assoc_array_edit *edit)
+{
+       struct assoc_array_ptr *ptr;
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* Clean up after an out of memory error */
+       for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
+               ptr = edit->new_meta[i];
+               if (ptr) {
+                       if (assoc_array_ptr_is_node(ptr))
+                               kfree(assoc_array_ptr_to_node(ptr));
+                       else
+                               kfree(assoc_array_ptr_to_shortcut(ptr));
+               }
+       }
+       kfree(edit);
+}
+
+/**
+ * assoc_array_gc - Garbage collect an associative array.
+ * @array: The array to clean.
+ * @ops: The operations to use.
+ * @iterator: A callback function to pass judgement on each object.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Collect garbage from an associative array and pack down the internal tree to
+ * save memory.
+ *
+ * The iterator function is asked to pass judgement upon each object in the
+ * array.  If it returns false, the object is discard and if it returns true,
+ * the object is kept.  If it returns true, it must increment the object's
+ * usage count (or whatever it needs to do to retain it) before returning.
+ *
+ * This function returns 0 if successful or -ENOMEM if out of memory.  In the
+ * latter case, the array is not changed.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+int assoc_array_gc(struct assoc_array *array,
+                  const struct assoc_array_ops *ops,
+                  bool (*iterator)(void *object, void *iterator_data),
+                  void *iterator_data)
+{
+       struct assoc_array_shortcut *shortcut, *new_s;
+       struct assoc_array_node *node, *new_n;
+       struct assoc_array_edit *edit;
+       struct assoc_array_ptr *cursor, *ptr;
+       struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
+       unsigned long nr_leaves_on_tree;
+       int keylen, slot, nr_free, next_slot, i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (!array->root)
+               return 0;
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return -ENOMEM;
+       edit->array = array;
+       edit->ops = ops;
+       edit->ops_for_excised_subtree = ops;
+       edit->set[0].ptr = &array->root;
+       edit->excised_subtree = array->root;
+
+       new_root = new_parent = NULL;
+       new_ptr_pp = &new_root;
+       cursor = array->root;
+
+descend:
+       /* If this point is a shortcut, then we need to duplicate it and
+        * advance the target cursor.
+        */
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+               new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
+                               keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s)
+                       goto enomem;
+               pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
+               memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
+                                        keylen * sizeof(unsigned long)));
+               new_s->back_pointer = new_parent;
+               new_s->parent_slot = shortcut->parent_slot;
+               *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
+               new_ptr_pp = &new_s->next_node;
+               cursor = shortcut->next_node;
+       }
+
+       /* Duplicate the node at this position */
+       node = assoc_array_ptr_to_node(cursor);
+       new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n)
+               goto enomem;
+       pr_devel("dup node %p -> %p\n", node, new_n);
+       new_n->back_pointer = new_parent;
+       new_n->parent_slot = node->parent_slot;
+       *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
+       new_ptr_pp = NULL;
+       slot = 0;
+
+continue_node:
+       /* Filter across any leaves and gc any subtrees */
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = node->slots[slot];
+               if (!ptr)
+                       continue;
+
+               if (assoc_array_ptr_is_leaf(ptr)) {
+                       if (iterator(assoc_array_ptr_to_leaf(ptr),
+                                    iterator_data))
+                               /* The iterator will have done any reference
+                                * counting on the object for us.
+                                */
+                               new_n->slots[slot] = ptr;
+                       continue;
+               }
+
+               new_ptr_pp = &new_n->slots[slot];
+               cursor = ptr;
+               goto descend;
+       }
+
+       pr_devel("-- compress node %p --\n", new_n);
+
+       /* Count up the number of empty slots in this node and work out the
+        * subtree leaf count.
+        */
+       new_n->nr_leaves_on_branch = 0;
+       nr_free = 0;
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = new_n->slots[slot];
+               if (!ptr)
+                       nr_free++;
+               else if (assoc_array_ptr_is_leaf(ptr))
+                       new_n->nr_leaves_on_branch++;
+       }
+       pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
+
+       /* See what we can fold in */
+       next_slot = 0;
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               struct assoc_array_shortcut *s;
+               struct assoc_array_node *child;
+
+               ptr = new_n->slots[slot];
+               if (!ptr || assoc_array_ptr_is_leaf(ptr))
+                       continue;
+
+               s = NULL;
+               if (assoc_array_ptr_is_shortcut(ptr)) {
+                       s = assoc_array_ptr_to_shortcut(ptr);
+                       ptr = s->next_node;
+               }
+
+               child = assoc_array_ptr_to_node(ptr);
+               new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
+
+               if (child->nr_leaves_on_branch <= nr_free + 1) {
+                       /* Fold the child node into this one */
+                       pr_devel("[%d] fold node %lu/%d [nx %d]\n",
+                                slot, child->nr_leaves_on_branch, nr_free + 1,
+                                next_slot);
+
+                       /* We would already have reaped an intervening shortcut
+                        * on the way back up the tree.
+                        */
+                       BUG_ON(s);
+
+                       new_n->slots[slot] = NULL;
+                       nr_free++;
+                       if (slot < next_slot)
+                               next_slot = slot;
+                       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                               struct assoc_array_ptr *p = child->slots[i];
+                               if (!p)
+                                       continue;
+                               BUG_ON(assoc_array_ptr_is_meta(p));
+                               while (new_n->slots[next_slot])
+                                       next_slot++;
+                               BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
+                               new_n->slots[next_slot++] = p;
+                               nr_free--;
+                       }
+                       kfree(child);
+               } else {
+                       pr_devel("[%d] retain node %lu/%d [nx %d]\n",
+                                slot, child->nr_leaves_on_branch, nr_free + 1,
+                                next_slot);
+               }
+       }
+
+       pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
+
+       nr_leaves_on_tree = new_n->nr_leaves_on_branch;
+
+       /* Excise this node if it is singly occupied by a shortcut */
+       if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
+               for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
+                       if ((ptr = new_n->slots[slot]))
+                               break;
+
+               if (assoc_array_ptr_is_meta(ptr) &&
+                   assoc_array_ptr_is_shortcut(ptr)) {
+                       pr_devel("excise node %p with 1 shortcut\n", new_n);
+                       new_s = assoc_array_ptr_to_shortcut(ptr);
+                       new_parent = new_n->back_pointer;
+                       slot = new_n->parent_slot;
+                       kfree(new_n);
+                       if (!new_parent) {
+                               new_s->back_pointer = NULL;
+                               new_s->parent_slot = 0;
+                               new_root = ptr;
+                               goto gc_complete;
+                       }
+
+                       if (assoc_array_ptr_is_shortcut(new_parent)) {
+                               /* We can discard any preceding shortcut also */
+                               struct assoc_array_shortcut *s =
+                                       assoc_array_ptr_to_shortcut(new_parent);
+
+                               pr_devel("excise preceding shortcut\n");
+
+                               new_parent = new_s->back_pointer = s->back_pointer;
+                               slot = new_s->parent_slot = s->parent_slot;
+                               kfree(s);
+                               if (!new_parent) {
+                                       new_s->back_pointer = NULL;
+                                       new_s->parent_slot = 0;
+                                       new_root = ptr;
+                                       goto gc_complete;
+                               }
+                       }
+
+                       new_s->back_pointer = new_parent;
+                       new_s->parent_slot = slot;
+                       new_n = assoc_array_ptr_to_node(new_parent);
+                       new_n->slots[slot] = ptr;
+                       goto ascend_old_tree;
+               }
+       }
+
+       /* Excise any shortcuts we might encounter that point to nodes that
+        * only contain leaves.
+        */
+       ptr = new_n->back_pointer;
+       if (!ptr)
+               goto gc_complete;
+
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               new_s = assoc_array_ptr_to_shortcut(ptr);
+               new_parent = new_s->back_pointer;
+               slot = new_s->parent_slot;
+
+               if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
+                       struct assoc_array_node *n;
+
+                       pr_devel("excise shortcut\n");
+                       new_n->back_pointer = new_parent;
+                       new_n->parent_slot = slot;
+                       kfree(new_s);
+                       if (!new_parent) {
+                               new_root = assoc_array_node_to_ptr(new_n);
+                               goto gc_complete;
+                       }
+
+                       n = assoc_array_ptr_to_node(new_parent);
+                       n->slots[slot] = assoc_array_node_to_ptr(new_n);
+               }
+       } else {
+               new_parent = ptr;
+       }
+       new_n = assoc_array_ptr_to_node(new_parent);
+
+ascend_old_tree:
+       ptr = node->back_pointer;
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               slot = shortcut->parent_slot;
+               cursor = shortcut->back_pointer;
+       } else {
+               slot = node->parent_slot;
+               cursor = ptr;
+       }
+       BUG_ON(!ptr);
+       node = assoc_array_ptr_to_node(cursor);
+       slot++;
+       goto continue_node;
+
+gc_complete:
+       edit->set[0].to = new_root;
+       assoc_array_apply_edit(edit);
+       edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
+       return 0;
+
+enomem:
+       pr_devel("enomem\n");
+       assoc_array_destroy_subtree(new_root, edit->ops);
+       kfree(edit);
+       return -ENOMEM;
+}
index 657979f71bef0a0b3331eb804c0921d1f7afe6a8..bf076d281d4045da0b4b780ecd19a19db3987485 100644 (file)
@@ -121,3 +121,6 @@ void mpi_free(MPI a)
        kfree(a);
 }
 EXPORT_SYMBOL_GPL(mpi_free);
+
+MODULE_DESCRIPTION("Multiprecision maths library");
+MODULE_LICENSE("GPL");
index 7d57af21f49e920776979dfe11023780ebff6afb..dee6cf4e6d34135e1880c5c01c7627aa1a33c69a 100644 (file)
@@ -476,40 +476,6 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg)
        return 0;
 }
 
-static void copy_gigantic_page(struct page *dst, struct page *src)
-{
-       int i;
-       struct hstate *h = page_hstate(src);
-       struct page *dst_base = dst;
-       struct page *src_base = src;
-
-       for (i = 0; i < pages_per_huge_page(h); ) {
-               cond_resched();
-               copy_highpage(dst, src);
-
-               i++;
-               dst = mem_map_next(dst, dst_base, i);
-               src = mem_map_next(src, src_base, i);
-       }
-}
-
-void copy_huge_page(struct page *dst, struct page *src)
-{
-       int i;
-       struct hstate *h = page_hstate(src);
-
-       if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
-               copy_gigantic_page(dst, src);
-               return;
-       }
-
-       might_sleep();
-       for (i = 0; i < pages_per_huge_page(h); i++) {
-               cond_resched();
-               copy_highpage(dst + i, src + i);
-       }
-}
-
 static void enqueue_huge_page(struct hstate *h, struct page *page)
 {
        int nid = page_to_nid(page);
@@ -736,6 +702,23 @@ int PageHuge(struct page *page)
 }
 EXPORT_SYMBOL_GPL(PageHuge);
 
+/*
+ * PageHeadHuge() only returns true for hugetlbfs head page, but not for
+ * normal or transparent huge pages.
+ */
+int PageHeadHuge(struct page *page_head)
+{
+       compound_page_dtor *dtor;
+
+       if (!PageHead(page_head))
+               return 0;
+
+       dtor = get_compound_page_dtor(page_head);
+
+       return dtor == free_huge_page;
+}
+EXPORT_SYMBOL_GPL(PageHeadHuge);
+
 pgoff_t __basepage_index(struct page *page)
 {
        struct page *page_head = compound_head(page);
index 0409e8f43fa0f3719fadf3f19afda45164bbaa94..5d9025f3b3e1cd65bd97655ee95d6cd2f390ce5b 100644 (file)
@@ -4272,13 +4272,6 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
-static struct kmem_cache *page_ptl_cachep;
-void __init ptlock_cache_init(void)
-{
-       page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
-                       SLAB_PANIC, NULL);
-}
-
 bool ptlock_alloc(struct page *page)
 {
        spinlock_t *ptl;
index c4403cdf3433ddc79515dac25e5eedd0b39fa320..eca4a3129129751208b41cfe808e9e31e5dc7b5f 100644 (file)
@@ -2950,7 +2950,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
                return;
        }
 
-       p += snprintf(p, maxlen, policy_modes[mode]);
+       p += snprintf(p, maxlen, "%s", policy_modes[mode]);
 
        if (flags & MPOL_MODE_FLAGS) {
                p += snprintf(p, buffer + maxlen - p, "=");
index 316e720a2023c260b0205cdd2cd6be7d257986ac..bb940045fe8595842ed58f2e32f87b83d40485e1 100644 (file)
@@ -441,6 +441,54 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
        return MIGRATEPAGE_SUCCESS;
 }
 
+/*
+ * Gigantic pages are so large that we do not guarantee that page++ pointer
+ * arithmetic will work across the entire page.  We need something more
+ * specialized.
+ */
+static void __copy_gigantic_page(struct page *dst, struct page *src,
+                               int nr_pages)
+{
+       int i;
+       struct page *dst_base = dst;
+       struct page *src_base = src;
+
+       for (i = 0; i < nr_pages; ) {
+               cond_resched();
+               copy_highpage(dst, src);
+
+               i++;
+               dst = mem_map_next(dst, dst_base, i);
+               src = mem_map_next(src, src_base, i);
+       }
+}
+
+static void copy_huge_page(struct page *dst, struct page *src)
+{
+       int i;
+       int nr_pages;
+
+       if (PageHuge(src)) {
+               /* hugetlbfs page */
+               struct hstate *h = page_hstate(src);
+               nr_pages = pages_per_huge_page(h);
+
+               if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+                       __copy_gigantic_page(dst, src, nr_pages);
+                       return;
+               }
+       } else {
+               /* thp page */
+               BUG_ON(!PageTransHuge(src));
+               nr_pages = hpage_nr_pages(src);
+       }
+
+       for (i = 0; i < nr_pages; i++) {
+               cond_resched();
+               copy_highpage(dst + i, src + i);
+       }
+}
+
 /*
  * Copy the page to its new location
  */
index 0c8967bb201878e567c4d949ae1d9ee57cf04d0b..eb043bf05f4c57687c0644f11c12bd6b3a654795 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
  */
 static bool pfmemalloc_active __read_mostly;
 
-/*
- * kmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementation relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-typedef unsigned int kmem_bufctl_t;
-#define BUFCTL_END     (((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE    (((kmem_bufctl_t)(~0U))-1)
-#define        BUFCTL_ACTIVE   (((kmem_bufctl_t)(~0U))-2)
-#define        SLAB_LIMIT      (((kmem_bufctl_t)(~0U))-3)
-
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU.  This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking.  We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- */
-struct slab_rcu {
-       struct rcu_head head;
-       struct kmem_cache *cachep;
-       void *addr;
-};
-
-/*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-       union {
-               struct {
-                       struct list_head list;
-                       unsigned long colouroff;
-                       void *s_mem;            /* including colour offset */
-                       unsigned int inuse;     /* num of objs active in slab */
-                       kmem_bufctl_t free;
-                       unsigned short nodeid;
-               };
-               struct slab_rcu __slab_cover_slab_rcu;
-       };
-};
-
 /*
  * struct array_cache
  *
@@ -456,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
        return page->slab_cache;
 }
 
-static inline struct slab *virt_to_slab(const void *obj)
-{
-       struct page *page = virt_to_head_page(obj);
-
-       VM_BUG_ON(!PageSlab(page));
-       return page->slab_page;
-}
-
-static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
+static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
                                 unsigned int idx)
 {
-       return slab->s_mem + cache->size * idx;
+       return page->s_mem + cache->size * idx;
 }
 
 /*
@@ -477,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
  *   reciprocal_divide(offset, cache->reciprocal_buffer_size)
  */
 static inline unsigned int obj_to_index(const struct kmem_cache *cache,
-                                       const struct slab *slab, void *obj)
+                                       const struct page *page, void *obj)
 {
-       u32 offset = (obj - slab->s_mem);
+       u32 offset = (obj - page->s_mem);
        return reciprocal_divide(offset, cache->reciprocal_buffer_size);
 }
 
@@ -641,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 
 static size_t slab_mgmt_size(size_t nr_objs, size_t align)
 {
-       return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
+       return ALIGN(nr_objs * sizeof(unsigned int), align);
 }
 
 /*
@@ -660,8 +586,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
         * on it. For the latter case, the memory allocated for a
         * slab is used for:
         *
-        * - The struct slab
-        * - One kmem_bufctl_t for each object
+        * - One unsigned int for each object
         * - Padding to respect alignment of @align
         * - @buffer_size bytes for each object
         *
@@ -674,8 +599,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
                mgmt_size = 0;
                nr_objs = slab_size / buffer_size;
 
-               if (nr_objs > SLAB_LIMIT)
-                       nr_objs = SLAB_LIMIT;
        } else {
                /*
                 * Ignore padding for the initial guess. The padding
@@ -685,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
                 * into the memory allocation when taking the padding
                 * into account.
                 */
-               nr_objs = (slab_size - sizeof(struct slab)) /
-                         (buffer_size + sizeof(kmem_bufctl_t));
+               nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int));
 
                /*
                 * This calculated number will be either the right
@@ -696,9 +618,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
                       > slab_size)
                        nr_objs--;
 
-               if (nr_objs > SLAB_LIMIT)
-                       nr_objs = SLAB_LIMIT;
-
                mgmt_size = slab_mgmt_size(nr_objs, align);
        }
        *num = nr_objs;
@@ -829,10 +748,8 @@ static struct array_cache *alloc_arraycache(int node, int entries,
        return nc;
 }
 
-static inline bool is_slab_pfmemalloc(struct slab *slabp)
+static inline bool is_slab_pfmemalloc(struct page *page)
 {
-       struct page *page = virt_to_page(slabp->s_mem);
-
        return PageSlabPfmemalloc(page);
 }
 
@@ -841,23 +758,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
                                                struct array_cache *ac)
 {
        struct kmem_cache_node *n = cachep->node[numa_mem_id()];
-       struct slab *slabp;
+       struct page *page;
        unsigned long flags;
 
        if (!pfmemalloc_active)
                return;
 
        spin_lock_irqsave(&n->list_lock, flags);
-       list_for_each_entry(slabp, &n->slabs_full, list)
-               if (is_slab_pfmemalloc(slabp))
+       list_for_each_entry(page, &n->slabs_full, lru)
+               if (is_slab_pfmemalloc(page))
                        goto out;
 
-       list_for_each_entry(slabp, &n->slabs_partial, list)
-               if (is_slab_pfmemalloc(slabp))
+       list_for_each_entry(page, &n->slabs_partial, lru)
+               if (is_slab_pfmemalloc(page))
                        goto out;
 
-       list_for_each_entry(slabp, &n->slabs_free, list)
-               if (is_slab_pfmemalloc(slabp))
+       list_for_each_entry(page, &n->slabs_free, lru)
+               if (is_slab_pfmemalloc(page))
                        goto out;
 
        pfmemalloc_active = false;
@@ -897,8 +814,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
                 */
                n = cachep->node[numa_mem_id()];
                if (!list_empty(&n->slabs_free) && force_refill) {
-                       struct slab *slabp = virt_to_slab(objp);
-                       ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
+                       struct page *page = virt_to_head_page(objp);
+                       ClearPageSlabPfmemalloc(page);
                        clear_obj_pfmemalloc(&objp);
                        recheck_pfmemalloc_active(cachep, ac);
                        return objp;
@@ -1099,8 +1016,7 @@ static void drain_alien_cache(struct kmem_cache *cachep,
 
 static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 {
-       struct slab *slabp = virt_to_slab(objp);
-       int nodeid = slabp->nodeid;
+       int nodeid = page_to_nid(virt_to_page(objp));
        struct kmem_cache_node *n;
        struct array_cache *alien = NULL;
        int node;
@@ -1111,7 +1027,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
         * Make sure we are not freeing a object from another node to the array
         * cache on this cpu.
         */
-       if (likely(slabp->nodeid == node))
+       if (likely(nodeid == node))
                return 0;
 
        n = cachep->node[node];
@@ -1512,6 +1428,8 @@ void __init kmem_cache_init(void)
 {
        int i;
 
+       BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
+                                       sizeof(struct rcu_head));
        kmem_cache = &kmem_cache_boot;
        setup_node_pointer(kmem_cache);
 
@@ -1687,7 +1605,7 @@ static noinline void
 slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
 {
        struct kmem_cache_node *n;
-       struct slab *slabp;
+       struct page *page;
        unsigned long flags;
        int node;
 
@@ -1706,15 +1624,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
                        continue;
 
                spin_lock_irqsave(&n->list_lock, flags);
-               list_for_each_entry(slabp, &n->slabs_full, list) {
+               list_for_each_entry(page, &n->slabs_full, lru) {
                        active_objs += cachep->num;
                        active_slabs++;
                }
-               list_for_each_entry(slabp, &n->slabs_partial, list) {
-                       active_objs += slabp->inuse;
+               list_for_each_entry(page, &n->slabs_partial, lru) {
+                       active_objs += page->active;
                        active_slabs++;
                }
-               list_for_each_entry(slabp, &n->slabs_free, list)
+               list_for_each_entry(page, &n->slabs_free, lru)
                        num_slabs++;
 
                free_objects += n->free_objects;
@@ -1736,19 +1654,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
  * did not request dmaable memory, we might get it, but that
  * would be relatively rare and ignorable.
  */
-static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
+                                                               int nodeid)
 {
        struct page *page;
        int nr_pages;
-       int i;
-
-#ifndef CONFIG_MMU
-       /*
-        * Nommu uses slab's for process anonymous memory allocations, and thus
-        * requires __GFP_COMP to properly refcount higher order allocations
-        */
-       flags |= __GFP_COMP;
-#endif
 
        flags |= cachep->allocflags;
        if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1772,12 +1682,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
        else
                add_zone_page_state(page_zone(page),
                        NR_SLAB_UNRECLAIMABLE, nr_pages);
-       for (i = 0; i < nr_pages; i++) {
-               __SetPageSlab(page + i);
-
-               if (page->pfmemalloc)
-                       SetPageSlabPfmemalloc(page + i);
-       }
+       __SetPageSlab(page);
+       if (page->pfmemalloc)
+               SetPageSlabPfmemalloc(page);
        memcg_bind_pages(cachep, cachep->gfporder);
 
        if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
@@ -1789,17 +1696,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                        kmemcheck_mark_unallocated_pages(page, nr_pages);
        }
 
-       return page_address(page);
+       return page;
 }
 
 /*
  * Interface to system's page release.
  */
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 {
-       unsigned long i = (1 << cachep->gfporder);
-       struct page *page = virt_to_page(addr);
-       const unsigned long nr_freed = i;
+       const unsigned long nr_freed = (1 << cachep->gfporder);
 
        kmemcheck_free_shadow(page, cachep->gfporder);
 
@@ -1809,27 +1714,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
        else
                sub_zone_page_state(page_zone(page),
                                NR_SLAB_UNRECLAIMABLE, nr_freed);
-       while (i--) {
-               BUG_ON(!PageSlab(page));
-               __ClearPageSlabPfmemalloc(page);
-               __ClearPageSlab(page);
-               page++;
-       }
+
+       BUG_ON(!PageSlab(page));
+       __ClearPageSlabPfmemalloc(page);
+       __ClearPageSlab(page);
+       page_mapcount_reset(page);
+       page->mapping = NULL;
 
        memcg_release_pages(cachep, cachep->gfporder);
        if (current->reclaim_state)
                current->reclaim_state->reclaimed_slab += nr_freed;
-       free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
+       __free_memcg_kmem_pages(page, cachep->gfporder);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
 {
-       struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
-       struct kmem_cache *cachep = slab_rcu->cachep;
+       struct kmem_cache *cachep;
+       struct page *page;
 
-       kmem_freepages(cachep, slab_rcu->addr);
-       if (OFF_SLAB(cachep))
-               kmem_cache_free(cachep->slabp_cache, slab_rcu);
+       page = container_of(head, struct page, rcu_head);
+       cachep = page->slab_cache;
+
+       kmem_freepages(cachep, page);
 }
 
 #if DEBUG
@@ -1978,19 +1884,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
                /* Print some data about the neighboring objects, if they
                 * exist:
                 */
-               struct slab *slabp = virt_to_slab(objp);
+               struct page *page = virt_to_head_page(objp);
                unsigned int objnr;
 
-               objnr = obj_to_index(cachep, slabp, objp);
+               objnr = obj_to_index(cachep, page, objp);
                if (objnr) {
-                       objp = index_to_obj(cachep, slabp, objnr - 1);
+                       objp = index_to_obj(cachep, page, objnr - 1);
                        realobj = (char *)objp + obj_offset(cachep);
                        printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
                               realobj, size);
                        print_objinfo(cachep, objp, 2);
                }
                if (objnr + 1 < cachep->num) {
-                       objp = index_to_obj(cachep, slabp, objnr + 1);
+                       objp = index_to_obj(cachep, page, objnr + 1);
                        realobj = (char *)objp + obj_offset(cachep);
                        printk(KERN_ERR "Next obj: start=%p, len=%d\n",
                               realobj, size);
@@ -2001,11 +1907,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
 #endif
 
 #if DEBUG
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+                                               struct page *page)
 {
        int i;
        for (i = 0; i < cachep->num; i++) {
-               void *objp = index_to_obj(cachep, slabp, i);
+               void *objp = index_to_obj(cachep, page, i);
 
                if (cachep->flags & SLAB_POISON) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -2030,7 +1937,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
        }
 }
 #else
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+                                               struct page *page)
 {
 }
 #endif
@@ -2044,23 +1952,34 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
  * Before calling the slab must have been unlinked from the cache.  The
  * cache-lock is not held/needed.
  */
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct page *page)
 {
-       void *addr = slabp->s_mem - slabp->colouroff;
+       void *freelist;
 
-       slab_destroy_debugcheck(cachep, slabp);
+       freelist = page->freelist;
+       slab_destroy_debugcheck(cachep, page);
        if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
-               struct slab_rcu *slab_rcu;
+               struct rcu_head *head;
+
+               /*
+                * RCU free overloads the RCU head over the LRU.
+                * slab_page has been overloeaded over the LRU,
+                * however it is not used from now on so that
+                * we can use it safely.
+                */
+               head = (void *)&page->rcu_head;
+               call_rcu(head, kmem_rcu_free);
 
-               slab_rcu = (struct slab_rcu *)slabp;
-               slab_rcu->cachep = cachep;
-               slab_rcu->addr = addr;
-               call_rcu(&slab_rcu->head, kmem_rcu_free);
        } else {
-               kmem_freepages(cachep, addr);
-               if (OFF_SLAB(cachep))
-                       kmem_cache_free(cachep->slabp_cache, slabp);
+               kmem_freepages(cachep, page);
        }
+
+       /*
+        * From now on, we don't use freelist
+        * although actual page can be freed in rcu context
+        */
+       if (OFF_SLAB(cachep))
+               kmem_cache_free(cachep->freelist_cache, freelist);
 }
 
 /**
@@ -2097,8 +2016,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
                         * use off-slab slabs. Needed to avoid a possible
                         * looping condition in cache_grow().
                         */
-                       offslab_limit = size - sizeof(struct slab);
-                       offslab_limit /= sizeof(kmem_bufctl_t);
+                       offslab_limit = size;
+                       offslab_limit /= sizeof(unsigned int);
 
                        if (num > offslab_limit)
                                break;
@@ -2220,7 +2139,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 int
 __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 {
-       size_t left_over, slab_size, ralign;
+       size_t left_over, freelist_size, ralign;
        gfp_t gfp;
        int err;
        size_t size = cachep->size;
@@ -2339,22 +2258,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
        if (!cachep->num)
                return -E2BIG;
 
-       slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
-                         + sizeof(struct slab), cachep->align);
+       freelist_size =
+               ALIGN(cachep->num * sizeof(unsigned int), cachep->align);
 
        /*
         * If the slab has been placed off-slab, and we have enough space then
         * move it on-slab. This is at the expense of any extra colouring.
         */
-       if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+       if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) {
                flags &= ~CFLGS_OFF_SLAB;
-               left_over -= slab_size;
+               left_over -= freelist_size;
        }
 
        if (flags & CFLGS_OFF_SLAB) {
                /* really off slab. No need for manual alignment */
-               slab_size =
-                   cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
+               freelist_size = cachep->num * sizeof(unsigned int);
 
 #ifdef CONFIG_PAGE_POISONING
                /* If we're going to use the generic kernel_map_pages()
@@ -2371,16 +2289,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
        if (cachep->colour_off < cachep->align)
                cachep->colour_off = cachep->align;
        cachep->colour = left_over / cachep->colour_off;
-       cachep->slab_size = slab_size;
+       cachep->freelist_size = freelist_size;
        cachep->flags = flags;
-       cachep->allocflags = 0;
+       cachep->allocflags = __GFP_COMP;
        if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
                cachep->allocflags |= GFP_DMA;
        cachep->size = size;
        cachep->reciprocal_buffer_size = reciprocal_value(size);
 
        if (flags & CFLGS_OFF_SLAB) {
-               cachep->slabp_cache = kmalloc_slab(slab_size, 0u);
+               cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
                /*
                 * This is a possibility for one of the malloc_sizes caches.
                 * But since we go off slab only for object size greater than
@@ -2388,7 +2306,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 * this should not happen at all.
                 * But leave a BUG_ON for some lucky dude.
                 */
-               BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
+               BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache));
        }
 
        err = setup_cpu_cache(cachep, gfp);
@@ -2494,7 +2412,7 @@ static int drain_freelist(struct kmem_cache *cache,
 {
        struct list_head *p;
        int nr_freed;
-       struct slab *slabp;
+       struct page *page;
 
        nr_freed = 0;
        while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
@@ -2506,18 +2424,18 @@ static int drain_freelist(struct kmem_cache *cache,
                        goto out;
                }
 
-               slabp = list_entry(p, struct slab, list);
+               page = list_entry(p, struct page, lru);
 #if DEBUG
-               BUG_ON(slabp->inuse);
+               BUG_ON(page->active);
 #endif
-               list_del(&slabp->list);
+               list_del(&page->lru);
                /*
                 * Safe to drop the lock. The slab is no longer linked
                 * to the cache.
                 */
                n->free_objects -= cache->num;
                spin_unlock_irq(&n->list_lock);
-               slab_destroy(cache, slabp);
+               slab_destroy(cache, page);
                nr_freed++;
        }
 out:
@@ -2600,52 +2518,42 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
  * descriptors in kmem_cache_create, we search through the malloc_sizes array.
  * If we are creating a malloc_sizes cache here it would not be visible to
  * kmem_find_general_cachep till the initialization is complete.
- * Hence we cannot have slabp_cache same as the original cache.
+ * Hence we cannot have freelist_cache same as the original cache.
  */
-static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
-                                  int colour_off, gfp_t local_flags,
-                                  int nodeid)
+static void *alloc_slabmgmt(struct kmem_cache *cachep,
+                                  struct page *page, int colour_off,
+                                  gfp_t local_flags, int nodeid)
 {
-       struct slab *slabp;
+       void *freelist;
+       void *addr = page_address(page);
 
        if (OFF_SLAB(cachep)) {
                /* Slab management obj is off-slab. */
-               slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+               freelist = kmem_cache_alloc_node(cachep->freelist_cache,
                                              local_flags, nodeid);
-               /*
-                * If the first object in the slab is leaked (it's allocated
-                * but no one has a reference to it), we want to make sure
-                * kmemleak does not treat the ->s_mem pointer as a reference
-                * to the object. Otherwise we will not report the leak.
-                */
-               kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
-                                  local_flags);
-               if (!slabp)
+               if (!freelist)
                        return NULL;
        } else {
-               slabp = objp + colour_off;
-               colour_off += cachep->slab_size;
+               freelist = addr + colour_off;
+               colour_off += cachep->freelist_size;
        }
-       slabp->inuse = 0;
-       slabp->colouroff = colour_off;
-       slabp->s_mem = objp + colour_off;
-       slabp->nodeid = nodeid;
-       slabp->free = 0;
-       return slabp;
+       page->active = 0;
+       page->s_mem = addr + colour_off;
+       return freelist;
 }
 
-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+static inline unsigned int *slab_freelist(struct page *page)
 {
-       return (kmem_bufctl_t *) (slabp + 1);
+       return (unsigned int *)(page->freelist);
 }
 
 static void cache_init_objs(struct kmem_cache *cachep,
-                           struct slab *slabp)
+                           struct page *page)
 {
        int i;
 
        for (i = 0; i < cachep->num; i++) {
-               void *objp = index_to_obj(cachep, slabp, i);
+               void *objp = index_to_obj(cachep, page, i);
 #if DEBUG
                /* need to poison the objs? */
                if (cachep->flags & SLAB_POISON)
@@ -2681,9 +2589,8 @@ static void cache_init_objs(struct kmem_cache *cachep,
                if (cachep->ctor)
                        cachep->ctor(objp);
 #endif
-               slab_bufctl(slabp)[i] = i + 1;
+               slab_freelist(page)[i] = i;
        }
-       slab_bufctl(slabp)[i - 1] = BUFCTL_END;
 }
 
 static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
@@ -2696,41 +2603,41 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
        }
 }
 
-static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
                                int nodeid)
 {
-       void *objp = index_to_obj(cachep, slabp, slabp->free);
-       kmem_bufctl_t next;
+       void *objp;
 
-       slabp->inuse++;
-       next = slab_bufctl(slabp)[slabp->free];
+       objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]);
+       page->active++;
 #if DEBUG
-       slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
-       WARN_ON(slabp->nodeid != nodeid);
+       WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
 #endif
-       slabp->free = next;
 
        return objp;
 }
 
-static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
                                void *objp, int nodeid)
 {
-       unsigned int objnr = obj_to_index(cachep, slabp, objp);
-
+       unsigned int objnr = obj_to_index(cachep, page, objp);
 #if DEBUG
+       unsigned int i;
+
        /* Verify that the slab belongs to the intended node */
-       WARN_ON(slabp->nodeid != nodeid);
+       WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
 
-       if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
-               printk(KERN_ERR "slab: double free detected in cache "
-                               "'%s', objp %p\n", cachep->name, objp);
-               BUG();
+       /* Verify double free bug */
+       for (i = page->active; i < cachep->num; i++) {
+               if (slab_freelist(page)[i] == objnr) {
+                       printk(KERN_ERR "slab: double free detected in cache "
+                                       "'%s', objp %p\n", cachep->name, objp);
+                       BUG();
+               }
        }
 #endif
-       slab_bufctl(slabp)[objnr] = slabp->free;
-       slabp->free = objnr;
-       slabp->inuse--;
+       page->active--;
+       slab_freelist(page)[page->active] = objnr;
 }
 
 /*
@@ -2738,23 +2645,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
  * for the slab allocator to be able to lookup the cache and slab of a
  * virtual address for kfree, ksize, and slab debugging.
  */
-static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
-                          void *addr)
+static void slab_map_pages(struct kmem_cache *cache, struct page *page,
+                          void *freelist)
 {
-       int nr_pages;
-       struct page *page;
-
-       page = virt_to_page(addr);
-
-       nr_pages = 1;
-       if (likely(!PageCompound(page)))
-               nr_pages <<= cache->gfporder;
-
-       do {
-               page->slab_cache = cache;
-               page->slab_page = slab;
-               page++;
-       } while (--nr_pages);
+       page->slab_cache = cache;
+       page->freelist = freelist;
 }
 
 /*
@@ -2762,9 +2657,9 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
 static int cache_grow(struct kmem_cache *cachep,
-               gfp_t flags, int nodeid, void *objp)
+               gfp_t flags, int nodeid, struct page *page)
 {
-       struct slab *slabp;
+       void *freelist;
        size_t offset;
        gfp_t local_flags;
        struct kmem_cache_node *n;
@@ -2805,20 +2700,20 @@ static int cache_grow(struct kmem_cache *cachep,
         * Get mem for the objs.  Attempt to allocate a physical page from
         * 'nodeid'.
         */
-       if (!objp)
-               objp = kmem_getpages(cachep, local_flags, nodeid);
-       if (!objp)
+       if (!page)
+               page = kmem_getpages(cachep, local_flags, nodeid);
+       if (!page)
                goto failed;
 
        /* Get slab management. */
-       slabp = alloc_slabmgmt(cachep, objp, offset,
+       freelist = alloc_slabmgmt(cachep, page, offset,
                        local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
-       if (!slabp)
+       if (!freelist)
                goto opps1;
 
-       slab_map_pages(cachep, slabp, objp);
+       slab_map_pages(cachep, page, freelist);
 
-       cache_init_objs(cachep, slabp);
+       cache_init_objs(cachep, page);
 
        if (local_flags & __GFP_WAIT)
                local_irq_disable();
@@ -2826,13 +2721,13 @@ static int cache_grow(struct kmem_cache *cachep,
        spin_lock(&n->list_lock);
 
        /* Make slab active. */
-       list_add_tail(&slabp->list, &(n->slabs_free));
+       list_add_tail(&page->lru, &(n->slabs_free));
        STATS_INC_GROWN(cachep);
        n->free_objects += cachep->num;
        spin_unlock(&n->list_lock);
        return 1;
 opps1:
-       kmem_freepages(cachep, objp);
+       kmem_freepages(cachep, page);
 failed:
        if (local_flags & __GFP_WAIT)
                local_irq_disable();
@@ -2880,9 +2775,8 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
 static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
                                   unsigned long caller)
 {
-       struct page *page;
        unsigned int objnr;
-       struct slab *slabp;
+       struct page *page;
 
        BUG_ON(virt_to_cache(objp) != cachep);
 
@@ -2890,8 +2784,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
        kfree_debugcheck(objp);
        page = virt_to_head_page(objp);
 
-       slabp = page->slab_page;
-
        if (cachep->flags & SLAB_RED_ZONE) {
                verify_redzone_free(cachep, objp);
                *dbg_redzone1(cachep, objp) = RED_INACTIVE;
@@ -2900,14 +2792,11 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
        if (cachep->flags & SLAB_STORE_USER)
                *dbg_userword(cachep, objp) = (void *)caller;
 
-       objnr = obj_to_index(cachep, slabp, objp);
+       objnr = obj_to_index(cachep, page, objp);
 
        BUG_ON(objnr >= cachep->num);
-       BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
+       BUG_ON(objp != index_to_obj(cachep, page, objnr));
 
-#ifdef CONFIG_DEBUG_SLAB_LEAK
-       slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
-#endif
        if (cachep->flags & SLAB_POISON) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
                if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
@@ -2924,33 +2813,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
        return objp;
 }
 
-static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
-{
-       kmem_bufctl_t i;
-       int entries = 0;
-
-       /* Check slab's freelist to see if this obj is there. */
-       for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
-               entries++;
-               if (entries > cachep->num || i >= cachep->num)
-                       goto bad;
-       }
-       if (entries != cachep->num - slabp->inuse) {
-bad:
-               printk(KERN_ERR "slab: Internal list corruption detected in "
-                       "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
-                       cachep->name, cachep->num, slabp, slabp->inuse,
-                       print_tainted());
-               print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
-                       sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
-                       1);
-               BUG();
-       }
-}
 #else
 #define kfree_debugcheck(x) do { } while(0)
 #define cache_free_debugcheck(x,objp,z) (objp)
-#define check_slabp(x,y) do { } while(0)
 #endif
 
 static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
@@ -2989,7 +2854,7 @@ retry:
 
        while (batchcount > 0) {
                struct list_head *entry;
-               struct slab *slabp;
+               struct page *page;
                /* Get slab alloc is to come from. */
                entry = n->slabs_partial.next;
                if (entry == &n->slabs_partial) {
@@ -2999,8 +2864,7 @@ retry:
                                goto must_grow;
                }
 
-               slabp = list_entry(entry, struct slab, list);
-               check_slabp(cachep, slabp);
+               page = list_entry(entry, struct page, lru);
                check_spinlock_acquired(cachep);
 
                /*
@@ -3008,24 +2872,23 @@ retry:
                 * there must be at least one object available for
                 * allocation.
                 */
-               BUG_ON(slabp->inuse >= cachep->num);
+               BUG_ON(page->active >= cachep->num);
 
-               while (slabp->inuse < cachep->num && batchcount--) {
+               while (page->active < cachep->num && batchcount--) {
                        STATS_INC_ALLOCED(cachep);
                        STATS_INC_ACTIVE(cachep);
                        STATS_SET_HIGH(cachep);
 
-                       ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
+                       ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
                                                                        node));
                }
-               check_slabp(cachep, slabp);
 
                /* move slabp to correct slabp list: */
-               list_del(&slabp->list);
-               if (slabp->free == BUFCTL_END)
-                       list_add(&slabp->list, &n->slabs_full);
+               list_del(&page->lru);
+               if (page->active == cachep->num)
+                       list_add(&page->list, &n->slabs_full);
                else
-                       list_add(&slabp->list, &n->slabs_partial);
+                       list_add(&page->list, &n->slabs_partial);
        }
 
 must_grow:
@@ -3097,16 +2960,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                *dbg_redzone1(cachep, objp) = RED_ACTIVE;
                *dbg_redzone2(cachep, objp) = RED_ACTIVE;
        }
-#ifdef CONFIG_DEBUG_SLAB_LEAK
-       {
-               struct slab *slabp;
-               unsigned objnr;
-
-               slabp = virt_to_head_page(objp)->slab_page;
-               objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
-               slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
-       }
-#endif
        objp += obj_offset(cachep);
        if (cachep->ctor && cachep->flags & SLAB_POISON)
                cachep->ctor(objp);
@@ -3248,18 +3101,20 @@ retry:
                 * We may trigger various forms of reclaim on the allowed
                 * set and go into memory reserves if necessary.
                 */
+               struct page *page;
+
                if (local_flags & __GFP_WAIT)
                        local_irq_enable();
                kmem_flagcheck(cache, flags);
-               obj = kmem_getpages(cache, local_flags, numa_mem_id());
+               page = kmem_getpages(cache, local_flags, numa_mem_id());
                if (local_flags & __GFP_WAIT)
                        local_irq_disable();
-               if (obj) {
+               if (page) {
                        /*
                         * Insert into the appropriate per node queues
                         */
-                       nid = page_to_nid(virt_to_page(obj));
-                       if (cache_grow(cache, flags, nid, obj)) {
+                       nid = page_to_nid(page);
+                       if (cache_grow(cache, flags, nid, page)) {
                                obj = ____cache_alloc_node(cache,
                                        flags | GFP_THISNODE, nid);
                                if (!obj)
@@ -3288,7 +3143,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
                                int nodeid)
 {
        struct list_head *entry;
-       struct slab *slabp;
+       struct page *page;
        struct kmem_cache_node *n;
        void *obj;
        int x;
@@ -3308,26 +3163,24 @@ retry:
                        goto must_grow;
        }
 
-       slabp = list_entry(entry, struct slab, list);
+       page = list_entry(entry, struct page, lru);
        check_spinlock_acquired_node(cachep, nodeid);
-       check_slabp(cachep, slabp);
 
        STATS_INC_NODEALLOCS(cachep);
        STATS_INC_ACTIVE(cachep);
        STATS_SET_HIGH(cachep);
 
-       BUG_ON(slabp->inuse == cachep->num);
+       BUG_ON(page->active == cachep->num);
 
-       obj = slab_get_obj(cachep, slabp, nodeid);
-       check_slabp(cachep, slabp);
+       obj = slab_get_obj(cachep, page, nodeid);
        n->free_objects--;
        /* move slabp to correct slabp list: */
-       list_del(&slabp->list);
+       list_del(&page->lru);
 
-       if (slabp->free == BUFCTL_END)
-               list_add(&slabp->list, &n->slabs_full);
+       if (page->active == cachep->num)
+               list_add(&page->lru, &n->slabs_full);
        else
-               list_add(&slabp->list, &n->slabs_partial);
+               list_add(&page->lru, &n->slabs_partial);
 
        spin_unlock(&n->list_lock);
        goto done;
@@ -3477,23 +3330,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
 
        for (i = 0; i < nr_objects; i++) {
                void *objp;
-               struct slab *slabp;
+               struct page *page;
 
                clear_obj_pfmemalloc(&objpp[i]);
                objp = objpp[i];
 
-               slabp = virt_to_slab(objp);
+               page = virt_to_head_page(objp);
                n = cachep->node[node];
-               list_del(&slabp->list);
+               list_del(&page->lru);
                check_spinlock_acquired_node(cachep, node);
-               check_slabp(cachep, slabp);
-               slab_put_obj(cachep, slabp, objp, node);
+               slab_put_obj(cachep, page, objp, node);
                STATS_DEC_ACTIVE(cachep);
                n->free_objects++;
-               check_slabp(cachep, slabp);
 
                /* fixup slab chains */
-               if (slabp->inuse == 0) {
+               if (page->active == 0) {
                        if (n->free_objects > n->free_limit) {
                                n->free_objects -= cachep->num;
                                /* No need to drop any previously held
@@ -3502,16 +3353,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                                 * a different cache, refer to comments before
                                 * alloc_slabmgmt.
                                 */
-                               slab_destroy(cachep, slabp);
+                               slab_destroy(cachep, page);
                        } else {
-                               list_add(&slabp->list, &n->slabs_free);
+                               list_add(&page->lru, &n->slabs_free);
                        }
                } else {
                        /* Unconditionally move a slab to the end of the
                         * partial list on free - maximum time for the
                         * other objects to be freed, too.
                         */
-                       list_add_tail(&slabp->list, &n->slabs_partial);
+                       list_add_tail(&page->lru, &n->slabs_partial);
                }
        }
 }
@@ -3551,10 +3402,10 @@ free_done:
 
                p = n->slabs_free.next;
                while (p != &(n->slabs_free)) {
-                       struct slab *slabp;
+                       struct page *page;
 
-                       slabp = list_entry(p, struct slab, list);
-                       BUG_ON(slabp->inuse);
+                       page = list_entry(p, struct page, lru);
+                       BUG_ON(page->active);
 
                        i++;
                        p = p->next;
@@ -4158,7 +4009,7 @@ out:
 #ifdef CONFIG_SLABINFO
 void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
 {
-       struct slab *slabp;
+       struct page *page;
        unsigned long active_objs;
        unsigned long num_objs;
        unsigned long active_slabs = 0;
@@ -4178,23 +4029,23 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
                check_irq_on();
                spin_lock_irq(&n->list_lock);
 
-               list_for_each_entry(slabp, &n->slabs_full, list) {
-                       if (slabp->inuse != cachep->num && !error)
+               list_for_each_entry(page, &n->slabs_full, lru) {
+                       if (page->active != cachep->num && !error)
                                error = "slabs_full accounting error";
                        active_objs += cachep->num;
                        active_slabs++;
                }
-               list_for_each_entry(slabp, &n->slabs_partial, list) {
-                       if (slabp->inuse == cachep->num && !error)
-                               error = "slabs_partial inuse accounting error";
-                       if (!slabp->inuse && !error)
-                               error = "slabs_partial/inuse accounting error";
-                       active_objs += slabp->inuse;
+               list_for_each_entry(page, &n->slabs_partial, lru) {
+                       if (page->active == cachep->num && !error)
+                               error = "slabs_partial accounting error";
+                       if (!page->active && !error)
+                               error = "slabs_partial accounting error";
+                       active_objs += page->active;
                        active_slabs++;
                }
-               list_for_each_entry(slabp, &n->slabs_free, list) {
-                       if (slabp->inuse && !error)
-                               error = "slabs_free/inuse accounting error";
+               list_for_each_entry(page, &n->slabs_free, lru) {
+                       if (page->active && !error)
+                               error = "slabs_free accounting error";
                        num_slabs++;
                }
                free_objects += n->free_objects;
@@ -4346,15 +4197,27 @@ static inline int add_caller(unsigned long *n, unsigned long v)
        return 1;
 }
 
-static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
+static void handle_slab(unsigned long *n, struct kmem_cache *c,
+                                               struct page *page)
 {
        void *p;
-       int i;
+       int i, j;
+
        if (n[0] == n[1])
                return;
-       for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
-               if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
+       for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
+               bool active = true;
+
+               for (j = page->active; j < c->num; j++) {
+                       /* Skip freed item */
+                       if (slab_freelist(page)[j] == i) {
+                               active = false;
+                               break;
+                       }
+               }
+               if (!active)
                        continue;
+
                if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
                        return;
        }
@@ -4379,7 +4242,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
 static int leaks_show(struct seq_file *m, void *p)
 {
        struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
-       struct slab *slabp;
+       struct page *page;
        struct kmem_cache_node *n;
        const char *name;
        unsigned long *x = m->private;
@@ -4403,10 +4266,10 @@ static int leaks_show(struct seq_file *m, void *p)
                check_irq_on();
                spin_lock_irq(&n->list_lock);
 
-               list_for_each_entry(slabp, &n->slabs_full, list)
-                       handle_slab(x, cachep, slabp);
-               list_for_each_entry(slabp, &n->slabs_partial, list)
-                       handle_slab(x, cachep, slabp);
+               list_for_each_entry(page, &n->slabs_full, lru)
+                       handle_slab(x, cachep, page);
+               list_for_each_entry(page, &n->slabs_partial, lru)
+                       handle_slab(x, cachep, page);
                spin_unlock_irq(&n->list_lock);
        }
        name = cachep->name;
index 7e8bd8d828bc0e5c7e96919c6e24186d8d5382df..545a170ebf9f66cf0e3716c9cd6f4cb7eef0eda6 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -155,7 +155,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
 /*
  * Maximum number of desirable partial slabs.
  * The existence of more partial slabs makes kmem_cache_shrink
- * sort the partial list by the number of objects in the.
+ * sort the partial list by the number of objects in use.
  */
 #define MAX_PARTIAL 10
 
@@ -933,6 +933,16 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
  * Hooks for other subsystems that check memory allocations. In a typical
  * production configuration these hooks all should produce no code at all.
  */
+static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
+{
+       kmemleak_alloc(ptr, size, 1, flags);
+}
+
+static inline void kfree_hook(const void *x)
+{
+       kmemleak_free(x);
+}
+
 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 {
        flags &= gfp_allowed_mask;
@@ -1217,8 +1227,8 @@ static unsigned long kmem_cache_flags(unsigned long object_size,
        /*
         * Enable debugging if selected on the kernel commandline.
         */
-       if (slub_debug && (!slub_debug_slabs ||
-               !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
+       if (slub_debug && (!slub_debug_slabs || (name &&
+               !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
                flags |= slub_debug;
 
        return flags;
@@ -1260,13 +1270,30 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
 static inline void dec_slabs_node(struct kmem_cache *s, int node,
                                                        int objects) {}
 
+static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
+{
+       kmemleak_alloc(ptr, size, 1, flags);
+}
+
+static inline void kfree_hook(const void *x)
+{
+       kmemleak_free(x);
+}
+
 static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
                                                        { return 0; }
 
 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
-               void *object) {}
+               void *object)
+{
+       kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
+               flags & gfp_allowed_mask);
+}
 
-static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
+static inline void slab_free_hook(struct kmem_cache *s, void *x)
+{
+       kmemleak_free_recursive(x, s->flags);
+}
 
 #endif /* CONFIG_SLUB_DEBUG */
 
@@ -2829,8 +2856,8 @@ static struct kmem_cache *kmem_cache_node;
  * slab on the node for this slabcache. There are no concurrent accesses
  * possible.
  *
- * Note that this function only works on the kmalloc_node_cache
- * when allocating for the kmalloc_node_cache. This is used for bootstrapping
+ * Note that this function only works on the kmem_cache_node
+ * when allocating for the kmem_cache_node. This is used for bootstrapping
  * memory on a fresh node that has no slab structures yet.
  */
 static void early_kmem_cache_node_alloc(int node)
@@ -3272,7 +3299,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
        if (page)
                ptr = page_address(page);
 
-       kmemleak_alloc(ptr, size, 1, flags);
+       kmalloc_large_node_hook(ptr, size, flags);
        return ptr;
 }
 
@@ -3336,7 +3363,7 @@ void kfree(const void *x)
        page = virt_to_head_page(x);
        if (unlikely(!PageSlab(page))) {
                BUG_ON(!PageCompound(page));
-               kmemleak_free(x);
+               kfree_hook(x);
                __free_memcg_kmem_pages(page, compound_order(page));
                return;
        }
index 7a9f80d451f548cd749331bc6143354eab53814d..84b26aaabd03b6d236ba82b84c713844916fd521 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -82,19 +82,6 @@ static void __put_compound_page(struct page *page)
 
 static void put_compound_page(struct page *page)
 {
-       /*
-        * hugetlbfs pages cannot be split from under us.  If this is a
-        * hugetlbfs page, check refcount on head page and release the page if
-        * the refcount becomes zero.
-        */
-       if (PageHuge(page)) {
-               page = compound_head(page);
-               if (put_page_testzero(page))
-                       __put_compound_page(page);
-
-               return;
-       }
-
        if (unlikely(PageTail(page))) {
                /* __split_huge_page_refcount can run under us */
                struct page *page_head = compound_trans_head(page);
@@ -111,14 +98,31 @@ static void put_compound_page(struct page *page)
                         * still hot on arches that do not support
                         * this_cpu_cmpxchg_double().
                         */
-                       if (PageSlab(page_head)) {
-                               if (PageTail(page)) {
+                       if (PageSlab(page_head) || PageHeadHuge(page_head)) {
+                               if (likely(PageTail(page))) {
+                                       /*
+                                        * __split_huge_page_refcount
+                                        * cannot race here.
+                                        */
+                                       VM_BUG_ON(!PageHead(page_head));
+                                       atomic_dec(&page->_mapcount);
                                        if (put_page_testzero(page_head))
                                                VM_BUG_ON(1);
-
-                                       atomic_dec(&page->_mapcount);
-                                       goto skip_lock_tail;
+                                       if (put_page_testzero(page_head))
+                                               __put_compound_page(page_head);
+                                       return;
                                } else
+                                       /*
+                                        * __split_huge_page_refcount
+                                        * run before us, "page" was a
+                                        * THP tail. The split
+                                        * page_head has been freed
+                                        * and reallocated as slab or
+                                        * hugetlbfs page of smaller
+                                        * order (only possible if
+                                        * reallocated as slab on
+                                        * x86).
+                                        */
                                        goto skip_lock;
                        }
                        /*
@@ -132,8 +136,27 @@ static void put_compound_page(struct page *page)
                                /* __split_huge_page_refcount run before us */
                                compound_unlock_irqrestore(page_head, flags);
 skip_lock:
-                               if (put_page_testzero(page_head))
-                                       __put_single_page(page_head);
+                               if (put_page_testzero(page_head)) {
+                                       /*
+                                        * The head page may have been
+                                        * freed and reallocated as a
+                                        * compound page of smaller
+                                        * order and then freed again.
+                                        * All we know is that it
+                                        * cannot have become: a THP
+                                        * page, a compound page of
+                                        * higher order, a tail page.
+                                        * That is because we still
+                                        * hold the refcount of the
+                                        * split THP tail and
+                                        * page_head was the THP head
+                                        * before the split.
+                                        */
+                                       if (PageHead(page_head))
+                                               __put_compound_page(page_head);
+                                       else
+                                               __put_single_page(page_head);
+                               }
 out_put_single:
                                if (put_page_testzero(page))
                                        __put_single_page(page);
@@ -155,7 +178,6 @@ out_put_single:
                        VM_BUG_ON(atomic_read(&page->_count) != 0);
                        compound_unlock_irqrestore(page_head, flags);
 
-skip_lock_tail:
                        if (put_page_testzero(page_head)) {
                                if (PageHead(page_head))
                                        __put_compound_page(page_head);
@@ -198,51 +220,52 @@ bool __get_page_tail(struct page *page)
         * proper PT lock that already serializes against
         * split_huge_page().
         */
+       unsigned long flags;
        bool got = false;
-       struct page *page_head;
-
-       /*
-        * If this is a hugetlbfs page it cannot be split under us.  Simply
-        * increment refcount for the head page.
-        */
-       if (PageHuge(page)) {
-               page_head = compound_head(page);
-               atomic_inc(&page_head->_count);
-               got = true;
-       } else {
-               unsigned long flags;
+       struct page *page_head = compound_trans_head(page);
 
-               page_head = compound_trans_head(page);
-               if (likely(page != page_head &&
-                                       get_page_unless_zero(page_head))) {
-
-                       /* Ref to put_compound_page() comment. */
-                       if (PageSlab(page_head)) {
-                               if (likely(PageTail(page))) {
-                                       __get_page_tail_foll(page, false);
-                                       return true;
-                               } else {
-                                       put_page(page_head);
-                                       return false;
-                               }
-                       }
-
-                       /*
-                        * page_head wasn't a dangling pointer but it
-                        * may not be a head page anymore by the time
-                        * we obtain the lock. That is ok as long as it
-                        * can't be freed from under us.
-                        */
-                       flags = compound_lock_irqsave(page_head);
-                       /* here __split_huge_page_refcount won't run anymore */
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+               /* Ref to put_compound_page() comment. */
+               if (PageSlab(page_head) || PageHeadHuge(page_head)) {
                        if (likely(PageTail(page))) {
+                               /*
+                                * This is a hugetlbfs page or a slab
+                                * page. __split_huge_page_refcount
+                                * cannot race here.
+                                */
+                               VM_BUG_ON(!PageHead(page_head));
                                __get_page_tail_foll(page, false);
-                               got = true;
-                       }
-                       compound_unlock_irqrestore(page_head, flags);
-                       if (unlikely(!got))
+                               return true;
+                       } else {
+                               /*
+                                * __split_huge_page_refcount run
+                                * before us, "page" was a THP
+                                * tail. The split page_head has been
+                                * freed and reallocated as slab or
+                                * hugetlbfs page of smaller order
+                                * (only possible if reallocated as
+                                * slab on x86).
+                                */
                                put_page(page_head);
+                               return false;
+                       }
+               }
+
+               /*
+                * page_head wasn't a dangling pointer but it
+                * may not be a head page anymore by the time
+                * we obtain the lock. That is ok as long as it
+                * can't be freed from under us.
+                */
+               flags = compound_lock_irqsave(page_head);
+               /* here __split_huge_page_refcount won't run anymore */
+               if (likely(PageTail(page))) {
+                       __get_page_tail_foll(page, false);
+                       got = true;
                }
+               compound_unlock_irqrestore(page_head, flags);
+               if (unlikely(!got))
+                       put_page(page_head);
        }
        return got;
 }
index 0715db64a5c3dd7ea0cd12f09c3fe62ee8385f3a..d334678c0bd8706d7c3efb6c0f35d0d62102c360 100644 (file)
@@ -224,7 +224,7 @@ source "net/hsr/Kconfig"
 
 config RPS
        boolean
-       depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+       depends on SMP && SYSFS
        default y
 
 config RFS_ACCEL
@@ -235,7 +235,7 @@ config RFS_ACCEL
 
 config XPS
        boolean
-       depends on SMP && USE_GENERIC_SMP_HELPERS
+       depends on SMP
        default y
 
 config NETPRIO_CGROUP
index 3dc0c6cf02a896e66071cd5f66b0a93f0dd3fc06..c4638e6f023843bedbceb91f1bef3c424ad3b666 100644 (file)
@@ -1425,7 +1425,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
        do {
                if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
                                              last_issued, &done,
-                                             &used) == DMA_SUCCESS) {
+                                             &used) == DMA_COMPLETE) {
                        /* Safe to free early-copied skbs now */
                        __skb_queue_purge(&sk->sk_async_wait_queue);
                        break;
@@ -1433,7 +1433,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
                        struct sk_buff *skb;
                        while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
                               (dma_async_is_complete(skb->dma_cookie, done,
-                                                     used) == DMA_SUCCESS)) {
+                                                     used) == DMA_COMPLETE)) {
                                __skb_dequeue(&sk->sk_async_wait_queue);
                                kfree_skb(skb);
                        }
index d0d14a04dce1eb2e4274e3111d9008b71da918aa..bf04b30a788a5425b28ba70c11a50b77bfc9c697 100644 (file)
@@ -471,15 +471,6 @@ struct rpc_filelist {
        umode_t mode;
 };
 
-static int rpc_delete_dentry(const struct dentry *dentry)
-{
-       return 1;
-}
-
-static const struct dentry_operations rpc_dentry_operations = {
-       .d_delete = rpc_delete_dentry,
-};
-
 static struct inode *
 rpc_get_inode(struct super_block *sb, umode_t mode)
 {
@@ -1266,7 +1257,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
        sb->s_magic = RPCAUTH_GSSMAGIC;
        sb->s_op = &s_ops;
-       sb->s_d_op = &rpc_dentry_operations;
+       sb->s_d_op = &simple_dentry_operations;
        sb->s_time_gran = 1;
 
        inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
index db0e5cd34c70866e097e0af91d745d1d96d579fe..91c4117637ae1fdf33d385ea5c3f9eba601e03a8 100644 (file)
@@ -1353,6 +1353,8 @@ static void render_out_of_line_list(FILE *out)
                        render_opcode(out, "ASN1_OP_END_SET_OF%s,\n", act);
                        render_opcode(out, "_jump_target(%u),\n", entry);
                        break;
+               default:
+                       break;
                }
                if (e->action)
                        render_opcode(out, "_action(ACT_%s),\n",
index 61090e0ff613aefeb5429c26d4efc7775ad97739..9c98100303774cd228ad83f3ff7bfcaeeb7af6fd 100755 (executable)
@@ -3289,6 +3289,7 @@ sub process {
                        }
                }
                if (!defined $suppress_whiletrailers{$linenr} &&
+                   defined($stat) && defined($cond) &&
                    $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
                        my ($s, $c) = ($stat, $cond);
 
index c26c81e925712fbc2ba38264f477bdd0a02f548d..a5918e01a4f71a6e97abc664682bd23f75bc544f 100644 (file)
@@ -16,7 +16,6 @@ obj-$(CONFIG_MMU)                     += min_addr.o
 # Object file lists
 obj-$(CONFIG_SECURITY)                 += security.o capability.o
 obj-$(CONFIG_SECURITYFS)               += inode.o
-# Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)         += selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)           += smack/built-in.o
 obj-$(CONFIG_AUDIT)                    += lsm_audit.o
index 031d2d9dd6950b7e6c1bf6f3b16f2a37a9ab9e22..89c78658031f10bfc0527030ed970aebe3ff5471 100644 (file)
@@ -111,7 +111,6 @@ static const char *const aa_audit_type[] = {
 static void audit_pre(struct audit_buffer *ab, void *ca)
 {
        struct common_audit_data *sa = ca;
-       struct task_struct *tsk = sa->aad->tsk ? sa->aad->tsk : current;
 
        if (aa_g_audit_header) {
                audit_log_format(ab, "apparmor=");
@@ -132,11 +131,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca)
 
        if (sa->aad->profile) {
                struct aa_profile *profile = sa->aad->profile;
-               pid_t pid;
-               rcu_read_lock();
-               pid = rcu_dereference(tsk->real_parent)->pid;
-               rcu_read_unlock();
-               audit_log_format(ab, " parent=%d", pid);
                if (profile->ns != root_ns) {
                        audit_log_format(ab, " namespace=");
                        audit_log_untrustedstring(ab, profile->ns->base.hname);
@@ -149,12 +143,6 @@ static void audit_pre(struct audit_buffer *ab, void *ca)
                audit_log_format(ab, " name=");
                audit_log_untrustedstring(ab, sa->aad->name);
        }
-
-       if (sa->aad->tsk) {
-               audit_log_format(ab, " pid=%d comm=", tsk->pid);
-               audit_log_untrustedstring(ab, tsk->comm);
-       }
-
 }
 
 /**
@@ -212,7 +200,7 @@ int aa_audit(int type, struct aa_profile *profile, gfp_t gfp,
 
        if (sa->aad->type == AUDIT_APPARMOR_KILL)
                (void)send_sig_info(SIGKILL, NULL,
-                                   sa->aad->tsk ?  sa->aad->tsk : current);
+                                   sa->u.tsk ?  sa->u.tsk : current);
 
        if (sa->aad->type == AUDIT_APPARMOR_ALLOWED)
                return complain_error(sa->aad->error);
index 84d1f5f538778b58f0b60c48d4a55ede44ff4c4f..1101c6f64bb7cb36602957ef2bcbc1538ee8aa63 100644 (file)
@@ -53,8 +53,7 @@ static void audit_cb(struct audit_buffer *ab, void *va)
 
 /**
  * audit_caps - audit a capability
- * @profile: profile confining task (NOT NULL)
- * @task: task capability test was performed against (NOT NULL)
+ * @profile: profile being tested for confinement (NOT NULL)
  * @cap: capability tested
  * @error: error code returned by test
  *
@@ -63,8 +62,7 @@ static void audit_cb(struct audit_buffer *ab, void *va)
  *
  * Returns: 0 or sa->error on success,  error code on failure
  */
-static int audit_caps(struct aa_profile *profile, struct task_struct *task,
-                     int cap, int error)
+static int audit_caps(struct aa_profile *profile, int cap, int error)
 {
        struct audit_cache *ent;
        int type = AUDIT_APPARMOR_AUTO;
@@ -73,7 +71,6 @@ static int audit_caps(struct aa_profile *profile, struct task_struct *task,
        sa.type = LSM_AUDIT_DATA_CAP;
        sa.aad = &aad;
        sa.u.cap = cap;
-       sa.aad->tsk = task;
        sa.aad->op = OP_CAPABLE;
        sa.aad->error = error;
 
@@ -124,8 +121,7 @@ static int profile_capable(struct aa_profile *profile, int cap)
 
 /**
  * aa_capable - test permission to use capability
- * @task: task doing capability test against (NOT NULL)
- * @profile: profile confining @task (NOT NULL)
+ * @profile: profile being tested against (NOT NULL)
  * @cap: capability to be tested
  * @audit: whether an audit record should be generated
  *
@@ -133,8 +129,7 @@ static int profile_capable(struct aa_profile *profile, int cap)
  *
  * Returns: 0 on success, or else an error code.
  */
-int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
-              int audit)
+int aa_capable(struct aa_profile *profile, int cap, int audit)
 {
        int error = profile_capable(profile, cap);
 
@@ -144,5 +139,5 @@ int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
                return error;
        }
 
-       return audit_caps(profile, task, cap, error);
+       return audit_caps(profile, cap, error);
 }
index 26c607c971f5656da192698602dbdb85afcfa8f1..452567d3a08e7ccfc5a7e3ae20ae95307554f85c 100644 (file)
@@ -50,23 +50,21 @@ void aa_free_domain_entries(struct aa_domain *domain)
 
 /**
  * may_change_ptraced_domain - check if can change profile on ptraced task
- * @task: task we want to change profile of   (NOT NULL)
  * @to_profile: profile to change to  (NOT NULL)
  *
- * Check if the task is ptraced and if so if the tracing task is allowed
+ * Check if current is ptraced and if so if the tracing task is allowed
  * to trace the new domain
  *
  * Returns: %0 or error if change not allowed
  */
-static int may_change_ptraced_domain(struct task_struct *task,
-                                    struct aa_profile *to_profile)
+static int may_change_ptraced_domain(struct aa_profile *to_profile)
 {
        struct task_struct *tracer;
        struct aa_profile *tracerp = NULL;
        int error = 0;
 
        rcu_read_lock();
-       tracer = ptrace_parent(task);
+       tracer = ptrace_parent(current);
        if (tracer)
                /* released below */
                tracerp = aa_get_task_profile(tracer);
@@ -75,7 +73,7 @@ static int may_change_ptraced_domain(struct task_struct *task,
        if (!tracer || unconfined(tracerp))
                goto out;
 
-       error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH);
+       error = aa_may_ptrace(tracerp, to_profile, PTRACE_MODE_ATTACH);
 
 out:
        rcu_read_unlock();
@@ -477,7 +475,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
        }
 
        if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
-               error = may_change_ptraced_domain(current, new_profile);
+               error = may_change_ptraced_domain(new_profile);
                if (error) {
                        aa_put_profile(new_profile);
                        goto audit;
@@ -690,7 +688,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
                        }
                }
 
-               error = may_change_ptraced_domain(current, hat);
+               error = may_change_ptraced_domain(hat);
                if (error) {
                        info = "ptraced";
                        error = -EPERM;
@@ -829,7 +827,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
        }
 
        /* check if tracing task is allowed to trace target domain */
-       error = may_change_ptraced_domain(current, target);
+       error = may_change_ptraced_domain(target);
        if (error) {
                info = "ptrace prevents transition";
                goto audit;
index 30e8d7687259aaef15defab3883e8e1e52d91c1f..ba3dfd17f23f2671b20512a63c06ba75928ed0fc 100644 (file)
@@ -109,7 +109,6 @@ struct apparmor_audit_data {
        void *profile;
        const char *name;
        const char *info;
-       struct task_struct *tsk;
        union {
                void *target;
                struct {
index 2e7c9d6a2f3bb3f7b7ab6f4aab3b63a46a56a10a..fc3fa381d8506c5dc249e93809b15292c87ffb84 100644 (file)
@@ -4,7 +4,7 @@
  * This file contains AppArmor capability mediation definitions.
  *
  * Copyright (C) 1998-2008 Novell/SUSE
- * Copyright 2009-2010 Canonical Ltd.
+ * Copyright 2009-2013 Canonical Ltd.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -38,8 +38,7 @@ struct aa_caps {
 
 extern struct aa_fs_entry aa_fs_entry_caps[];
 
-int aa_capable(struct task_struct *task, struct aa_profile *profile, int cap,
-              int audit);
+int aa_capable(struct aa_profile *profile, int cap, int audit);
 
 static inline void aa_free_cap_rules(struct aa_caps *caps)
 {
index aeda0fbc8b2fe2273278edf84903dfe0a1b13925..288ca76e2fb116a6ccb1c366220ef1d918f7b67c 100644 (file)
@@ -19,8 +19,8 @@
 
 struct aa_profile;
 
-int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
-                 struct aa_profile *tracee, unsigned int mode);
+int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee,
+                 unsigned int mode);
 
 int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee,
              unsigned int mode);
index c51d2266587e8b62d09394c0d8320a4b53001569..777ac1c47253ef4f88aa1bc97c0539a9e0b96e83 100644 (file)
@@ -54,15 +54,14 @@ static int aa_audit_ptrace(struct aa_profile *profile,
 
 /**
  * aa_may_ptrace - test if tracer task can trace the tracee
- * @tracer_task: task who will do the tracing  (NOT NULL)
  * @tracer: profile of the task doing the tracing  (NOT NULL)
  * @tracee: task to be traced
  * @mode: whether PTRACE_MODE_READ || PTRACE_MODE_ATTACH
  *
  * Returns: %0 else error code if permission denied or error
  */
-int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
-                 struct aa_profile *tracee, unsigned int mode)
+int aa_may_ptrace(struct aa_profile *tracer, struct aa_profile *tracee,
+                 unsigned int mode)
 {
        /* TODO: currently only based on capability, not extended ptrace
         *       rules,
@@ -72,7 +71,7 @@ int aa_may_ptrace(struct task_struct *tracer_task, struct aa_profile *tracer,
        if (unconfined(tracer) || tracer == tracee)
                return 0;
        /* log this capability request */
-       return aa_capable(tracer_task, tracer, CAP_SYS_PTRACE, 1);
+       return aa_capable(tracer, CAP_SYS_PTRACE, 1);
 }
 
 /**
@@ -101,7 +100,7 @@ int aa_ptrace(struct task_struct *tracer, struct task_struct *tracee,
        if (!unconfined(tracer_p)) {
                struct aa_profile *tracee_p = aa_get_task_profile(tracee);
 
-               error = aa_may_ptrace(tracer, tracer_p, tracee_p, mode);
+               error = aa_may_ptrace(tracer_p, tracee_p, mode);
                error = aa_audit_ptrace(tracer_p, tracee_p, error);
 
                aa_put_profile(tracee_p);
index fb99e18123b41b4f049fd98078e88aafccb7729b..4257b7e2796bf16e41db9ddca0e1f7c8e0d08a02 100644 (file)
@@ -145,7 +145,7 @@ static int apparmor_capable(const struct cred *cred, struct user_namespace *ns,
        if (!error) {
                profile = aa_cred_profile(cred);
                if (!unconfined(profile))
-                       error = aa_capable(current, profile, cap, audit);
+                       error = aa_capable(profile, cap, audit);
        }
        return error;
 }
index dbeb9bc27b24a14b7f546a44843bba2757db77cb..8b4f24ae43381de05af67271edd9a8ddd57c651f 100644 (file)
@@ -777,9 +777,15 @@ static int cap_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx)
        return 0;
 }
 
-static int cap_xfrm_state_alloc_security(struct xfrm_state *x,
-                                        struct xfrm_user_sec_ctx *sec_ctx,
-                                        u32 secid)
+static int cap_xfrm_state_alloc(struct xfrm_state *x,
+                               struct xfrm_user_sec_ctx *sec_ctx)
+{
+       return 0;
+}
+
+static int cap_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                       struct xfrm_sec_ctx *polsec,
+                                       u32 secid)
 {
        return 0;
 }
@@ -1101,7 +1107,8 @@ void __init security_fixup_ops(struct security_operations *ops)
        set_to_cap_if_null(ops, xfrm_policy_clone_security);
        set_to_cap_if_null(ops, xfrm_policy_free_security);
        set_to_cap_if_null(ops, xfrm_policy_delete_security);
-       set_to_cap_if_null(ops, xfrm_state_alloc_security);
+       set_to_cap_if_null(ops, xfrm_state_alloc);
+       set_to_cap_if_null(ops, xfrm_state_alloc_acquire);
        set_to_cap_if_null(ops, xfrm_state_free_security);
        set_to_cap_if_null(ops, xfrm_state_delete_security);
        set_to_cap_if_null(ops, xfrm_policy_lookup);
index 0b759e17a1311abc3a2fb1f6fbf7e554b8f71ce5..77ca965ab684e67e2809dc16f024728d8df41c70 100644 (file)
@@ -13,7 +13,9 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/err.h>
+#include <linux/sched.h>
 #include <linux/rbtree.h>
+#include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/digsig.h>
 
 
 static struct key *keyring[INTEGRITY_KEYRING_MAX];
 
+#ifdef CONFIG_IMA_TRUSTED_KEYRING
+static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
+       ".evm",
+       ".module",
+       ".ima",
+};
+#else
 static const char *keyring_name[INTEGRITY_KEYRING_MAX] = {
        "_evm",
        "_module",
        "_ima",
 };
+#endif
 
 int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
-                                       const char *digest, int digestlen)
+                           const char *digest, int digestlen)
 {
        if (id >= INTEGRITY_KEYRING_MAX)
                return -EINVAL;
 
        if (!keyring[id]) {
                keyring[id] =
-                       request_key(&key_type_keyring, keyring_name[id], NULL);
+                   request_key(&key_type_keyring, keyring_name[id], NULL);
                if (IS_ERR(keyring[id])) {
                        int err = PTR_ERR(keyring[id]);
                        pr_err("no %s keyring: %d\n", keyring_name[id], err);
@@ -44,9 +54,10 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
                }
        }
 
-       switch (sig[0]) {
+       switch (sig[1]) {
        case 1:
-               return digsig_verify(keyring[id], sig, siglen,
+               /* v1 API expect signature without xattr type */
+               return digsig_verify(keyring[id], sig + 1, siglen - 1,
                                     digest, digestlen);
        case 2:
                return asymmetric_verify(keyring[id], sig, siglen,
@@ -55,3 +66,21 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
 
        return -EOPNOTSUPP;
 }
+
+int integrity_init_keyring(const unsigned int id)
+{
+       const struct cred *cred = current_cred();
+       const struct user_struct *user = cred->user;
+
+       keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0),
+                                   KGIDT_INIT(0), cred,
+                                   ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                    KEY_USR_VIEW | KEY_USR_READ),
+                                   KEY_ALLOC_NOT_IN_QUOTA, user->uid_keyring);
+       if (!IS_ERR(keyring[id]))
+               set_bit(KEY_FLAG_TRUSTED_ONLY, &keyring[id]->flags);
+       else
+               pr_info("Can't allocate %s keyring (%ld)\n",
+                       keyring_name[id], PTR_ERR(keyring[id]));
+       return 0;
+}
index b4754667659da1d6d8896a07f689c4b3f53fc2c8..9eae4809006be6f364ed9e5fe31ccb897381b856 100644 (file)
 
 #include "integrity.h"
 
-/*
- * signature format v2 - for using with asymmetric keys
- */
-struct signature_v2_hdr {
-       uint8_t version;        /* signature format version */
-       uint8_t hash_algo;      /* Digest algorithm [enum pkey_hash_algo] */
-       uint32_t keyid;         /* IMA key identifier - not X509/PGP specific*/
-       uint16_t sig_size;      /* signature size */
-       uint8_t sig[0];         /* signature payload */
-} __packed;
-
 /*
  * Request an asymmetric key.
  */
index af9b6852f4e1bf571b55a2010fd6ab0488119cda..336b3ddfe63f5304374f860419cd3f5251bde5b8 100644 (file)
@@ -123,7 +123,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
                goto out;
        }
 
-       xattr_len = rc - 1;
+       xattr_len = rc;
 
        /* check value type */
        switch (xattr_data->type) {
@@ -143,7 +143,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
                if (rc)
                        break;
                rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM,
-                                       xattr_data->digest, xattr_len,
+                                       (const char *)xattr_data, xattr_len,
                                        calc.digest, sizeof(calc.digest));
                if (!rc) {
                        /* we probably want to replace rsa with hmac here */
index b1753e98bf9aff919ab2b832de59e0858ae37abb..46408b9e62e876e4f711a5231ab0584aa5c7c1fd 100644 (file)
@@ -11,8 +11,9 @@
 
 #include <linux/module.h>
 #include <linux/xattr.h>
+#include <linux/evm.h>
 
-int posix_xattr_acl(char *xattr)
+int posix_xattr_acl(const char *xattr)
 {
        int xattr_len = strlen(xattr);
 
index 74522dbd10a6e093fe293786f83a10ca8d7361bc..c49d3f14cbec96b49e2b8bedf15b4be570c5a7ef 100644 (file)
@@ -70,6 +70,8 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode)
 
 static void iint_free(struct integrity_iint_cache *iint)
 {
+       kfree(iint->ima_hash);
+       iint->ima_hash = NULL;
        iint->version = 0;
        iint->flags = 0UL;
        iint->ima_file_status = INTEGRITY_UNKNOWN;
index 39196abaff0d69d7d600ecd53847ba62b8cebed1..dad8d4ca2437fd608b73218993b23c3275c65b8d 100644 (file)
@@ -9,6 +9,7 @@ config IMA
        select CRYPTO_HMAC
        select CRYPTO_MD5
        select CRYPTO_SHA1
+       select CRYPTO_HASH_INFO
        select TCG_TPM if HAS_IOMEM && !UML
        select TCG_TIS if TCG_TPM && X86
        select TCG_IBMVTPM if TCG_TPM && PPC64
@@ -45,6 +46,69 @@ config IMA_LSM_RULES
        help
          Disabling this option will disregard LSM based policy rules.
 
+choice
+       prompt "Default template"
+       default IMA_NG_TEMPLATE
+       depends on IMA
+       help
+         Select the default IMA measurement template.
+
+         The original 'ima' measurement list template contains a
+         hash, defined as 20 bytes, and a null terminated pathname,
+         limited to 255 characters.  The 'ima-ng' measurement list
+         template permits both larger hash digests and longer
+         pathnames.
+
+       config IMA_TEMPLATE
+               bool "ima"
+       config IMA_NG_TEMPLATE
+               bool "ima-ng (default)"
+       config IMA_SIG_TEMPLATE
+               bool "ima-sig"
+endchoice
+
+config IMA_DEFAULT_TEMPLATE
+       string
+       depends on IMA
+       default "ima" if IMA_TEMPLATE
+       default "ima-ng" if IMA_NG_TEMPLATE
+       default "ima-sig" if IMA_SIG_TEMPLATE
+
+choice
+       prompt "Default integrity hash algorithm"
+       default IMA_DEFAULT_HASH_SHA1
+       depends on IMA
+       help
+          Select the default hash algorithm used for the measurement
+          list, integrity appraisal and audit log.  The compiled default
+          hash algorithm can be overwritten using the kernel command
+          line 'ima_hash=' option.
+
+       config IMA_DEFAULT_HASH_SHA1
+               bool "SHA1 (default)"
+               depends on CRYPTO_SHA1
+
+       config IMA_DEFAULT_HASH_SHA256
+               bool "SHA256"
+               depends on CRYPTO_SHA256 && !IMA_TEMPLATE
+
+       config IMA_DEFAULT_HASH_SHA512
+               bool "SHA512"
+               depends on CRYPTO_SHA512 && !IMA_TEMPLATE
+
+       config IMA_DEFAULT_HASH_WP512
+               bool "WP512"
+               depends on CRYPTO_WP512 && !IMA_TEMPLATE
+endchoice
+
+config IMA_DEFAULT_HASH
+       string
+       depends on IMA
+       default "sha1" if IMA_DEFAULT_HASH_SHA1
+       default "sha256" if IMA_DEFAULT_HASH_SHA256
+       default "sha512" if IMA_DEFAULT_HASH_SHA512
+       default "wp512" if IMA_DEFAULT_HASH_WP512
+
 config IMA_APPRAISE
        bool "Appraise integrity measurements"
        depends on IMA
@@ -59,3 +123,11 @@ config IMA_APPRAISE
          For more information on integrity appraisal refer to:
          <http://linux-ima.sourceforge.net>
          If unsure, say N.
+
+config IMA_TRUSTED_KEYRING
+       bool "Require all keys on the _ima keyring be signed"
+       depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING
+       default y
+       help
+          This option requires that all keys added to the _ima
+          keyring be signed by a key on the system trusted keyring.
index 56dfee7cbf61c6605adf103dbf91393b33fe9256..d79263d2fdbfd0098666f541db32552c784688ce 100644 (file)
@@ -6,5 +6,5 @@
 obj-$(CONFIG_IMA) += ima.o
 
 ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \
-        ima_policy.o
+        ima_policy.o ima_template.o ima_template_lib.o
 ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o
index b3dd616560f72054e13ddeaedaa39017585c2e5c..bf03c6a16cc83ace2f47a3ea7571ef79a63ed1d9 100644 (file)
@@ -36,23 +36,48 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 };
 #define IMA_HASH_BITS 9
 #define IMA_MEASURE_HTABLE_SIZE (1 << IMA_HASH_BITS)
 
+#define IMA_TEMPLATE_FIELD_ID_MAX_LEN  16
+#define IMA_TEMPLATE_NUM_FIELDS_MAX    15
+
+#define IMA_TEMPLATE_IMA_NAME "ima"
+#define IMA_TEMPLATE_IMA_FMT "d|n"
+
 /* set during initialization */
 extern int ima_initialized;
 extern int ima_used_chip;
-extern char *ima_hash;
+extern int ima_hash_algo;
 extern int ima_appraise;
 
-/* IMA inode template definition */
-struct ima_template_data {
-       u8 digest[IMA_DIGEST_SIZE];     /* sha1/md5 measurement hash */
-       char file_name[IMA_EVENT_NAME_LEN_MAX + 1];     /* name + \0 */
+/* IMA template field data definition */
+struct ima_field_data {
+       u8 *data;
+       u32 len;
+};
+
+/* IMA template field definition */
+struct ima_template_field {
+       const char field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN];
+       int (*field_init) (struct integrity_iint_cache *iint, struct file *file,
+                          const unsigned char *filename,
+                          struct evm_ima_xattr_data *xattr_value,
+                          int xattr_len, struct ima_field_data *field_data);
+       void (*field_show) (struct seq_file *m, enum ima_show_type show,
+                           struct ima_field_data *field_data);
+};
+
+/* IMA template descriptor definition */
+struct ima_template_desc {
+       char *name;
+       char *fmt;
+       int num_fields;
+       struct ima_template_field **fields;
 };
 
 struct ima_template_entry {
-       u8 digest[IMA_DIGEST_SIZE];     /* sha1 or md5 measurement hash */
-       const char *template_name;
-       int template_len;
-       struct ima_template_data template;
+       u8 digest[TPM_DIGEST_SIZE];     /* sha1 or md5 measurement hash */
+       struct ima_template_desc *template_desc; /* template descriptor */
+       u32 template_data_len;
+       struct ima_field_data template_data[0]; /* template related data */
 };
 
 struct ima_queue_entry {
@@ -69,13 +94,21 @@ int ima_fs_init(void);
 void ima_fs_cleanup(void);
 int ima_inode_alloc(struct inode *inode);
 int ima_add_template_entry(struct ima_template_entry *entry, int violation,
-                          const char *op, struct inode *inode);
-int ima_calc_file_hash(struct file *file, char *digest);
-int ima_calc_buffer_hash(const void *data, int len, char *digest);
-int ima_calc_boot_aggregate(char *digest);
-void ima_add_violation(struct inode *inode, const unsigned char *filename,
+                          const char *op, struct inode *inode,
+                          const unsigned char *filename);
+int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash);
+int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields,
+                             struct ima_digest_data *hash);
+int __init ima_calc_boot_aggregate(struct ima_digest_data *hash);
+void ima_add_violation(struct file *file, const unsigned char *filename,
                       const char *op, const char *cause);
 int ima_init_crypto(void);
+void ima_putc(struct seq_file *m, void *data, int datalen);
+void ima_print_digest(struct seq_file *m, u8 *digest, int size);
+struct ima_template_desc *ima_template_desc_current(void);
+int ima_init_template(void);
+
+int ima_init_template(void);
 
 /*
  * used to protect h_table and sha_table
@@ -98,14 +131,21 @@ static inline unsigned long ima_hash_key(u8 *digest)
 int ima_get_action(struct inode *inode, int mask, int function);
 int ima_must_measure(struct inode *inode, int mask, int function);
 int ima_collect_measurement(struct integrity_iint_cache *iint,
-                           struct file *file);
+                           struct file *file,
+                           struct evm_ima_xattr_data **xattr_value,
+                           int *xattr_len);
 void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
-                          const unsigned char *filename);
+                          const unsigned char *filename,
+                          struct evm_ima_xattr_data *xattr_value,
+                          int xattr_len);
 void ima_audit_measurement(struct integrity_iint_cache *iint,
                           const unsigned char *filename);
+int ima_alloc_init_template(struct integrity_iint_cache *iint,
+                           struct file *file, const unsigned char *filename,
+                           struct evm_ima_xattr_data *xattr_value,
+                           int xattr_len, struct ima_template_entry **entry);
 int ima_store_template(struct ima_template_entry *entry, int violation,
-                      struct inode *inode);
-void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show);
+                      struct inode *inode, const unsigned char *filename);
 const char *ima_d_path(struct path *path, char **pathbuf);
 
 /* rbtree tree calls to lookup, insert, delete
@@ -131,17 +171,25 @@ void ima_delete_rules(void);
 
 #ifdef CONFIG_IMA_APPRAISE
 int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
-                            struct file *file, const unsigned char *filename);
+                            struct file *file, const unsigned char *filename,
+                            struct evm_ima_xattr_data *xattr_value,
+                            int xattr_len);
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
                                           int func);
+void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                      struct ima_digest_data *hash);
+int ima_read_xattr(struct dentry *dentry,
+                  struct evm_ima_xattr_data **xattr_value);
 
 #else
 static inline int ima_appraise_measurement(int func,
                                           struct integrity_iint_cache *iint,
                                           struct file *file,
-                                          const unsigned char *filename)
+                                          const unsigned char *filename,
+                                          struct evm_ima_xattr_data *xattr_value,
+                                          int xattr_len)
 {
        return INTEGRITY_UNKNOWN;
 }
@@ -162,6 +210,19 @@ static inline enum integrity_status ima_get_cache_status(struct integrity_iint_c
 {
        return INTEGRITY_UNKNOWN;
 }
+
+static inline void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
+                                    int xattr_len,
+                                    struct ima_digest_data *hash)
+{
+}
+
+static inline int ima_read_xattr(struct dentry *dentry,
+                                struct evm_ima_xattr_data **xattr_value)
+{
+       return 0;
+}
+
 #endif
 
 /* LSM based policy rules require audit */
index 1c03e8f1e0e125cc948854e033d689a2aab22303..0e7540863fc299687877ae4591961b13b60fcaa9 100644 (file)
 #include <linux/fs.h>
 #include <linux/xattr.h>
 #include <linux/evm.h>
+#include <crypto/hash_info.h>
 #include "ima.h"
 
-static const char *IMA_TEMPLATE_NAME = "ima";
+/*
+ * ima_alloc_init_template - create and initialize a new template entry
+ */
+int ima_alloc_init_template(struct integrity_iint_cache *iint,
+                           struct file *file, const unsigned char *filename,
+                           struct evm_ima_xattr_data *xattr_value,
+                           int xattr_len, struct ima_template_entry **entry)
+{
+       struct ima_template_desc *template_desc = ima_template_desc_current();
+       int i, result = 0;
+
+       *entry = kzalloc(sizeof(**entry) + template_desc->num_fields *
+                        sizeof(struct ima_field_data), GFP_NOFS);
+       if (!*entry)
+               return -ENOMEM;
+
+       for (i = 0; i < template_desc->num_fields; i++) {
+               struct ima_template_field *field = template_desc->fields[i];
+               u32 len;
+
+               result = field->field_init(iint, file, filename,
+                                          xattr_value, xattr_len,
+                                          &((*entry)->template_data[i]));
+               if (result != 0)
+                       goto out;
+
+               len = (*entry)->template_data[i].len;
+               (*entry)->template_data_len += sizeof(len);
+               (*entry)->template_data_len += len;
+       }
+       (*entry)->template_desc = template_desc;
+       return 0;
+out:
+       kfree(*entry);
+       *entry = NULL;
+       return result;
+}
 
 /*
  * ima_store_template - store ima template measurements
@@ -39,28 +76,34 @@ static const char *IMA_TEMPLATE_NAME = "ima";
  * Returns 0 on success, error code otherwise
  */
 int ima_store_template(struct ima_template_entry *entry,
-                      int violation, struct inode *inode)
+                      int violation, struct inode *inode,
+                      const unsigned char *filename)
 {
        const char *op = "add_template_measure";
        const char *audit_cause = "hashing_error";
+       char *template_name = entry->template_desc->name;
        int result;
-
-       memset(entry->digest, 0, sizeof(entry->digest));
-       entry->template_name = IMA_TEMPLATE_NAME;
-       entry->template_len = sizeof(entry->template);
+       struct {
+               struct ima_digest_data hdr;
+               char digest[TPM_DIGEST_SIZE];
+       } hash;
 
        if (!violation) {
-               result = ima_calc_buffer_hash(&entry->template,
-                                               entry->template_len,
-                                               entry->digest);
+               int num_fields = entry->template_desc->num_fields;
+
+               /* this function uses default algo */
+               hash.hdr.algo = HASH_ALGO_SHA1;
+               result = ima_calc_field_array_hash(&entry->template_data[0],
+                                                  num_fields, &hash.hdr);
                if (result < 0) {
                        integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-                                           entry->template_name, op,
+                                           template_name, op,
                                            audit_cause, result, 0);
                        return result;
                }
+               memcpy(entry->digest, hash.hdr.digest, hash.hdr.length);
        }
-       result = ima_add_template_entry(entry, violation, op, inode);
+       result = ima_add_template_entry(entry, violation, op, inode, filename);
        return result;
 }
 
@@ -71,24 +114,24 @@ int ima_store_template(struct ima_template_entry *entry,
  * By extending the PCR with 0xFF's instead of with zeroes, the PCR
  * value is invalidated.
  */
-void ima_add_violation(struct inode *inode, const unsigned char *filename,
+void ima_add_violation(struct file *file, const unsigned char *filename,
                       const char *op, const char *cause)
 {
        struct ima_template_entry *entry;
+       struct inode *inode = file->f_dentry->d_inode;
        int violation = 1;
        int result;
 
        /* can overflow, only indicator */
        atomic_long_inc(&ima_htable.violations);
 
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry) {
+       result = ima_alloc_init_template(NULL, file, filename,
+                                        NULL, 0, &entry);
+       if (result < 0) {
                result = -ENOMEM;
                goto err_out;
        }
-       memset(&entry->template, 0, sizeof(entry->template));
-       strncpy(entry->template.file_name, filename, IMA_EVENT_NAME_LEN_MAX);
-       result = ima_store_template(entry, violation, inode);
+       result = ima_store_template(entry, violation, inode, filename);
        if (result < 0)
                kfree(entry);
 err_out:
@@ -138,20 +181,42 @@ int ima_must_measure(struct inode *inode, int mask, int function)
  * Return 0 on success, error code otherwise
  */
 int ima_collect_measurement(struct integrity_iint_cache *iint,
-                           struct file *file)
+                           struct file *file,
+                           struct evm_ima_xattr_data **xattr_value,
+                           int *xattr_len)
 {
        struct inode *inode = file_inode(file);
        const char *filename = file->f_dentry->d_name.name;
        int result = 0;
+       struct {
+               struct ima_digest_data hdr;
+               char digest[IMA_MAX_DIGEST_SIZE];
+       } hash;
+
+       if (xattr_value)
+               *xattr_len = ima_read_xattr(file->f_dentry, xattr_value);
 
        if (!(iint->flags & IMA_COLLECTED)) {
                u64 i_version = file_inode(file)->i_version;
 
-               iint->ima_xattr.type = IMA_XATTR_DIGEST;
-               result = ima_calc_file_hash(file, iint->ima_xattr.digest);
+               /* use default hash algorithm */
+               hash.hdr.algo = ima_hash_algo;
+
+               if (xattr_value)
+                       ima_get_hash_algo(*xattr_value, *xattr_len, &hash.hdr);
+
+               result = ima_calc_file_hash(file, &hash.hdr);
                if (!result) {
-                       iint->version = i_version;
-                       iint->flags |= IMA_COLLECTED;
+                       int length = sizeof(hash.hdr) + hash.hdr.length;
+                       void *tmpbuf = krealloc(iint->ima_hash, length,
+                                               GFP_NOFS);
+                       if (tmpbuf) {
+                               iint->ima_hash = tmpbuf;
+                               memcpy(iint->ima_hash, &hash, length);
+                               iint->version = i_version;
+                               iint->flags |= IMA_COLLECTED;
+                       } else
+                               result = -ENOMEM;
                }
        }
        if (result)
@@ -177,7 +242,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
  * Must be called with iint->mutex held.
  */
 void ima_store_measurement(struct integrity_iint_cache *iint,
-                          struct file *file, const unsigned char *filename)
+                          struct file *file, const unsigned char *filename,
+                          struct evm_ima_xattr_data *xattr_value,
+                          int xattr_len)
 {
        const char *op = "add_template_measure";
        const char *audit_cause = "ENOMEM";
@@ -189,19 +256,15 @@ void ima_store_measurement(struct integrity_iint_cache *iint,
        if (iint->flags & IMA_MEASURED)
                return;
 
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry) {
+       result = ima_alloc_init_template(iint, file, filename,
+                                        xattr_value, xattr_len, &entry);
+       if (result < 0) {
                integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
                                    op, audit_cause, result, 0);
                return;
        }
-       memset(&entry->template, 0, sizeof(entry->template));
-       memcpy(entry->template.digest, iint->ima_xattr.digest, IMA_DIGEST_SIZE);
-       strcpy(entry->template.file_name,
-              (strlen(filename) > IMA_EVENT_NAME_LEN_MAX) ?
-              file->f_dentry->d_name.name : filename);
 
-       result = ima_store_template(entry, violation, inode);
+       result = ima_store_template(entry, violation, inode, filename);
        if (!result || result == -EEXIST)
                iint->flags |= IMA_MEASURED;
        if (result < 0)
@@ -212,14 +275,16 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
                           const unsigned char *filename)
 {
        struct audit_buffer *ab;
-       char hash[(IMA_DIGEST_SIZE * 2) + 1];
+       char hash[(iint->ima_hash->length * 2) + 1];
+       const char *algo_name = hash_algo_name[iint->ima_hash->algo];
+       char algo_hash[sizeof(hash) + strlen(algo_name) + 2];
        int i;
 
        if (iint->flags & IMA_AUDITED)
                return;
 
-       for (i = 0; i < IMA_DIGEST_SIZE; i++)
-               hex_byte_pack(hash + (i * 2), iint->ima_xattr.digest[i]);
+       for (i = 0; i < iint->ima_hash->length; i++)
+               hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]);
        hash[i * 2] = '\0';
 
        ab = audit_log_start(current->audit_context, GFP_KERNEL,
@@ -230,7 +295,8 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
        audit_log_format(ab, "file=");
        audit_log_untrustedstring(ab, filename);
        audit_log_format(ab, " hash=");
-       audit_log_untrustedstring(ab, hash);
+       snprintf(algo_hash, sizeof(algo_hash), "%s:%s", algo_name, hash);
+       audit_log_untrustedstring(ab, algo_hash);
 
        audit_log_task_info(ab, current);
        audit_log_end(ab);
index 2d4becab8918053a6d3520dc32527a33d92a3717..46353ee517f6f321a738b067584d7e62dd8872fe 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/magic.h>
 #include <linux/ima.h>
 #include <linux/evm.h>
+#include <crypto/hash_info.h>
 
 #include "ima.h"
 
@@ -43,19 +44,31 @@ int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func)
 }
 
 static int ima_fix_xattr(struct dentry *dentry,
-                         struct integrity_iint_cache *iint)
+                        struct integrity_iint_cache *iint)
 {
-       iint->ima_xattr.type = IMA_XATTR_DIGEST;
-       return __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA,
-                                    (u8 *)&iint->ima_xattr,
-                                     sizeof(iint->ima_xattr), 0);
+       int rc, offset;
+       u8 algo = iint->ima_hash->algo;
+
+       if (algo <= HASH_ALGO_SHA1) {
+               offset = 1;
+               iint->ima_hash->xattr.sha1.type = IMA_XATTR_DIGEST;
+       } else {
+               offset = 0;
+               iint->ima_hash->xattr.ng.type = IMA_XATTR_DIGEST_NG;
+               iint->ima_hash->xattr.ng.algo = algo;
+       }
+       rc = __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA,
+                                  &iint->ima_hash->xattr.data[offset],
+                                  (sizeof(iint->ima_hash->xattr) - offset) +
+                                  iint->ima_hash->length, 0);
+       return rc;
 }
 
 /* Return specific func appraised cached result */
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
                                           int func)
 {
-       switch(func) {
+       switch (func) {
        case MMAP_CHECK:
                return iint->ima_mmap_status;
        case BPRM_CHECK:
@@ -71,7 +84,7 @@ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
 static void ima_set_cache_status(struct integrity_iint_cache *iint,
                                 int func, enum integrity_status status)
 {
-       switch(func) {
+       switch (func) {
        case MMAP_CHECK:
                iint->ima_mmap_status = status;
                break;
@@ -90,7 +103,7 @@ static void ima_set_cache_status(struct integrity_iint_cache *iint,
 
 static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
 {
-       switch(func) {
+       switch (func) {
        case MMAP_CHECK:
                iint->flags |= (IMA_MMAP_APPRAISED | IMA_APPRAISED);
                break;
@@ -107,6 +120,50 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
        }
 }
 
+void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                      struct ima_digest_data *hash)
+{
+       struct signature_v2_hdr *sig;
+
+       if (!xattr_value || xattr_len < 2)
+               return;
+
+       switch (xattr_value->type) {
+       case EVM_IMA_XATTR_DIGSIG:
+               sig = (typeof(sig))xattr_value;
+               if (sig->version != 2 || xattr_len <= sizeof(*sig))
+                       return;
+               hash->algo = sig->hash_algo;
+               break;
+       case IMA_XATTR_DIGEST_NG:
+               hash->algo = xattr_value->digest[0];
+               break;
+       case IMA_XATTR_DIGEST:
+               /* this is for backward compatibility */
+               if (xattr_len == 21) {
+                       unsigned int zero = 0;
+                       if (!memcmp(&xattr_value->digest[16], &zero, 4))
+                               hash->algo = HASH_ALGO_MD5;
+                       else
+                               hash->algo = HASH_ALGO_SHA1;
+               } else if (xattr_len == 17)
+                       hash->algo = HASH_ALGO_MD5;
+               break;
+       }
+}
+
+int ima_read_xattr(struct dentry *dentry,
+                  struct evm_ima_xattr_data **xattr_value)
+{
+       struct inode *inode = dentry->d_inode;
+
+       if (!inode->i_op->getxattr)
+               return 0;
+
+       return vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)xattr_value,
+                                 0, GFP_NOFS);
+}
+
 /*
  * ima_appraise_measurement - appraise file measurement
  *
@@ -116,23 +173,22 @@ static void ima_cache_flags(struct integrity_iint_cache *iint, int func)
  * Return 0 on success, error code otherwise
  */
 int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
-                            struct file *file, const unsigned char *filename)
+                            struct file *file, const unsigned char *filename,
+                            struct evm_ima_xattr_data *xattr_value,
+                            int xattr_len)
 {
        struct dentry *dentry = file->f_dentry;
        struct inode *inode = dentry->d_inode;
-       struct evm_ima_xattr_data *xattr_value = NULL;
        enum integrity_status status = INTEGRITY_UNKNOWN;
        const char *op = "appraise_data";
        char *cause = "unknown";
-       int rc;
+       int rc = xattr_len, hash_start = 0;
 
        if (!ima_appraise)
                return 0;
        if (!inode->i_op->getxattr)
                return INTEGRITY_UNKNOWN;
 
-       rc = vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)&xattr_value,
-                               0, GFP_NOFS);
        if (rc <= 0) {
                if (rc && rc != -ENODATA)
                        goto out;
@@ -153,14 +209,25 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
                goto out;
        }
        switch (xattr_value->type) {
+       case IMA_XATTR_DIGEST_NG:
+               /* first byte contains algorithm id */
+               hash_start = 1;
        case IMA_XATTR_DIGEST:
                if (iint->flags & IMA_DIGSIG_REQUIRED) {
                        cause = "IMA signature required";
                        status = INTEGRITY_FAIL;
                        break;
                }
-               rc = memcmp(xattr_value->digest, iint->ima_xattr.digest,
-                           IMA_DIGEST_SIZE);
+               if (xattr_len - sizeof(xattr_value->type) - hash_start >=
+                               iint->ima_hash->length)
+                       /* xattr length may be longer. md5 hash in previous
+                          version occupied 20 bytes in xattr, instead of 16
+                        */
+                       rc = memcmp(&xattr_value->digest[hash_start],
+                                   iint->ima_hash->digest,
+                                   iint->ima_hash->length);
+               else
+                       rc = -EINVAL;
                if (rc) {
                        cause = "invalid-hash";
                        status = INTEGRITY_FAIL;
@@ -171,9 +238,9 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
        case EVM_IMA_XATTR_DIGSIG:
                iint->flags |= IMA_DIGSIG;
                rc = integrity_digsig_verify(INTEGRITY_KEYRING_IMA,
-                                            xattr_value->digest, rc - 1,
-                                            iint->ima_xattr.digest,
-                                            IMA_DIGEST_SIZE);
+                                            (const char *)xattr_value, rc,
+                                            iint->ima_hash->digest,
+                                            iint->ima_hash->length);
                if (rc == -EOPNOTSUPP) {
                        status = INTEGRITY_UNKNOWN;
                } else if (rc) {
@@ -203,7 +270,6 @@ out:
                ima_cache_flags(iint, func);
        }
        ima_set_cache_status(iint, func, status);
-       kfree(xattr_value);
        return status;
 }
 
@@ -219,7 +285,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
        if (iint->flags & IMA_DIGSIG)
                return;
 
-       rc = ima_collect_measurement(iint, file);
+       rc = ima_collect_measurement(iint, file, NULL, NULL);
        if (rc < 0)
                return;
 
@@ -315,3 +381,14 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name)
        }
        return result;
 }
+
+#ifdef CONFIG_IMA_TRUSTED_KEYRING
+static int __init init_ima_keyring(void)
+{
+       int ret;
+
+       ret = integrity_init_keyring(INTEGRITY_KEYRING_IMA);
+       return 0;
+}
+late_initcall(init_ima_keyring);
+#endif
index a02e0791cf15c7add98bd922ebcc08cd3db0f725..676e0292dfecf6744b720b8a7e103415b924cc37 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <crypto/hash.h>
+#include <crypto/hash_info.h>
 #include "ima.h"
 
 static struct crypto_shash *ima_shash_tfm;
@@ -28,31 +29,58 @@ int ima_init_crypto(void)
 {
        long rc;
 
-       ima_shash_tfm = crypto_alloc_shash(ima_hash, 0, 0);
+       ima_shash_tfm = crypto_alloc_shash(hash_algo_name[ima_hash_algo], 0, 0);
        if (IS_ERR(ima_shash_tfm)) {
                rc = PTR_ERR(ima_shash_tfm);
-               pr_err("Can not allocate %s (reason: %ld)\n", ima_hash, rc);
+               pr_err("Can not allocate %s (reason: %ld)\n",
+                      hash_algo_name[ima_hash_algo], rc);
                return rc;
        }
        return 0;
 }
 
+static struct crypto_shash *ima_alloc_tfm(enum hash_algo algo)
+{
+       struct crypto_shash *tfm = ima_shash_tfm;
+       int rc;
+
+       if (algo != ima_hash_algo && algo < HASH_ALGO__LAST) {
+               tfm = crypto_alloc_shash(hash_algo_name[algo], 0, 0);
+               if (IS_ERR(tfm)) {
+                       rc = PTR_ERR(tfm);
+                       pr_err("Can not allocate %s (reason: %d)\n",
+                              hash_algo_name[algo], rc);
+               }
+       }
+       return tfm;
+}
+
+static void ima_free_tfm(struct crypto_shash *tfm)
+{
+       if (tfm != ima_shash_tfm)
+               crypto_free_shash(tfm);
+}
+
 /*
  * Calculate the MD5/SHA1 file digest
  */
-int ima_calc_file_hash(struct file *file, char *digest)
+static int ima_calc_file_hash_tfm(struct file *file,
+                                 struct ima_digest_data *hash,
+                                 struct crypto_shash *tfm)
 {
        loff_t i_size, offset = 0;
        char *rbuf;
        int rc, read = 0;
        struct {
                struct shash_desc shash;
-               char ctx[crypto_shash_descsize(ima_shash_tfm)];
+               char ctx[crypto_shash_descsize(tfm)];
        } desc;
 
-       desc.shash.tfm = ima_shash_tfm;
+       desc.shash.tfm = tfm;
        desc.shash.flags = 0;
 
+       hash->length = crypto_shash_digestsize(tfm);
+
        rc = crypto_shash_init(&desc.shash);
        if (rc != 0)
                return rc;
@@ -85,27 +113,83 @@ int ima_calc_file_hash(struct file *file, char *digest)
        }
        kfree(rbuf);
        if (!rc)
-               rc = crypto_shash_final(&desc.shash, digest);
+               rc = crypto_shash_final(&desc.shash, hash->digest);
        if (read)
                file->f_mode &= ~FMODE_READ;
 out:
        return rc;
 }
 
+int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
+{
+       struct crypto_shash *tfm;
+       int rc;
+
+       tfm = ima_alloc_tfm(hash->algo);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       rc = ima_calc_file_hash_tfm(file, hash, tfm);
+
+       ima_free_tfm(tfm);
+
+       return rc;
+}
+
 /*
- * Calculate the hash of a given buffer
+ * Calculate the hash of template data
  */
-int ima_calc_buffer_hash(const void *data, int len, char *digest)
+static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data,
+                                        int num_fields,
+                                        struct ima_digest_data *hash,
+                                        struct crypto_shash *tfm)
 {
        struct {
                struct shash_desc shash;
-               char ctx[crypto_shash_descsize(ima_shash_tfm)];
+               char ctx[crypto_shash_descsize(tfm)];
        } desc;
+       int rc, i;
 
-       desc.shash.tfm = ima_shash_tfm;
+       desc.shash.tfm = tfm;
        desc.shash.flags = 0;
 
-       return crypto_shash_digest(&desc.shash, data, len, digest);
+       hash->length = crypto_shash_digestsize(tfm);
+
+       rc = crypto_shash_init(&desc.shash);
+       if (rc != 0)
+               return rc;
+
+       for (i = 0; i < num_fields; i++) {
+               rc = crypto_shash_update(&desc.shash,
+                                        (const u8 *) &field_data[i].len,
+                                        sizeof(field_data[i].len));
+               rc = crypto_shash_update(&desc.shash, field_data[i].data,
+                                        field_data[i].len);
+               if (rc)
+                       break;
+       }
+
+       if (!rc)
+               rc = crypto_shash_final(&desc.shash, hash->digest);
+
+       return rc;
+}
+
+int ima_calc_field_array_hash(struct ima_field_data *field_data, int num_fields,
+                             struct ima_digest_data *hash)
+{
+       struct crypto_shash *tfm;
+       int rc;
+
+       tfm = ima_alloc_tfm(hash->algo);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       rc = ima_calc_field_array_hash_tfm(field_data, num_fields, hash, tfm);
+
+       ima_free_tfm(tfm);
+
+       return rc;
 }
 
 static void __init ima_pcrread(int idx, u8 *pcr)
@@ -120,16 +204,17 @@ static void __init ima_pcrread(int idx, u8 *pcr)
 /*
  * Calculate the boot aggregate hash
  */
-int __init ima_calc_boot_aggregate(char *digest)
+static int __init ima_calc_boot_aggregate_tfm(char *digest,
+                                             struct crypto_shash *tfm)
 {
-       u8 pcr_i[IMA_DIGEST_SIZE];
+       u8 pcr_i[TPM_DIGEST_SIZE];
        int rc, i;
        struct {
                struct shash_desc shash;
-               char ctx[crypto_shash_descsize(ima_shash_tfm)];
+               char ctx[crypto_shash_descsize(tfm)];
        } desc;
 
-       desc.shash.tfm = ima_shash_tfm;
+       desc.shash.tfm = tfm;
        desc.shash.flags = 0;
 
        rc = crypto_shash_init(&desc.shash);
@@ -140,9 +225,26 @@ int __init ima_calc_boot_aggregate(char *digest)
        for (i = TPM_PCR0; i < TPM_PCR8; i++) {
                ima_pcrread(i, pcr_i);
                /* now accumulate with current aggregate */
-               rc = crypto_shash_update(&desc.shash, pcr_i, IMA_DIGEST_SIZE);
+               rc = crypto_shash_update(&desc.shash, pcr_i, TPM_DIGEST_SIZE);
        }
        if (!rc)
                crypto_shash_final(&desc.shash, digest);
        return rc;
 }
+
+int __init ima_calc_boot_aggregate(struct ima_digest_data *hash)
+{
+       struct crypto_shash *tfm;
+       int rc;
+
+       tfm = ima_alloc_tfm(hash->algo);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+
+       hash->length = crypto_shash_digestsize(tfm);
+       rc = ima_calc_boot_aggregate_tfm(hash->digest, tfm);
+
+       ima_free_tfm(tfm);
+
+       return rc;
+}
index 38477c9c3415cd9af47ec402e9694056a9ca3c22..d47a7c86a21d0d94f6c41933fd18311e553dafce 100644 (file)
@@ -88,8 +88,7 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
         * against concurrent list-extension
         */
        rcu_read_lock();
-       qe = list_entry_rcu(qe->later.next,
-                           struct ima_queue_entry, later);
+       qe = list_entry_rcu(qe->later.next, struct ima_queue_entry, later);
        rcu_read_unlock();
        (*pos)++;
 
@@ -100,7 +99,7 @@ static void ima_measurements_stop(struct seq_file *m, void *v)
 {
 }
 
-static void ima_putc(struct seq_file *m, void *data, int datalen)
+void ima_putc(struct seq_file *m, void *data, int datalen)
 {
        while (datalen--)
                seq_putc(m, *(char *)data++);
@@ -111,6 +110,7 @@ static void ima_putc(struct seq_file *m, void *data, int datalen)
  *       char[20]=template digest
  *       32bit-le=template name size
  *       char[n]=template name
+ *       [eventdata length]
  *       eventdata[n]=template specific data
  */
 static int ima_measurements_show(struct seq_file *m, void *v)
@@ -120,6 +120,7 @@ static int ima_measurements_show(struct seq_file *m, void *v)
        struct ima_template_entry *e;
        int namelen;
        u32 pcr = CONFIG_IMA_MEASURE_PCR_IDX;
+       int i;
 
        /* get entry */
        e = qe->entry;
@@ -134,18 +135,25 @@ static int ima_measurements_show(struct seq_file *m, void *v)
        ima_putc(m, &pcr, sizeof pcr);
 
        /* 2nd: template digest */
-       ima_putc(m, e->digest, IMA_DIGEST_SIZE);
+       ima_putc(m, e->digest, TPM_DIGEST_SIZE);
 
        /* 3rd: template name size */
-       namelen = strlen(e->template_name);
+       namelen = strlen(e->template_desc->name);
        ima_putc(m, &namelen, sizeof namelen);
 
        /* 4th:  template name */
-       ima_putc(m, (void *)e->template_name, namelen);
+       ima_putc(m, e->template_desc->name, namelen);
+
+       /* 5th:  template length (except for 'ima' template) */
+       if (strcmp(e->template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0)
+               ima_putc(m, &e->template_data_len,
+                        sizeof(e->template_data_len));
 
-       /* 5th:  template specific data */
-       ima_template_show(m, (struct ima_template_data *)&e->template,
-                         IMA_SHOW_BINARY);
+       /* 6th:  template specific data */
+       for (i = 0; i < e->template_desc->num_fields; i++) {
+               e->template_desc->fields[i]->field_show(m, IMA_SHOW_BINARY,
+                                                       &e->template_data[i]);
+       }
        return 0;
 }
 
@@ -168,41 +176,21 @@ static const struct file_operations ima_measurements_ops = {
        .release = seq_release,
 };
 
-static void ima_print_digest(struct seq_file *m, u8 *digest)
+void ima_print_digest(struct seq_file *m, u8 *digest, int size)
 {
        int i;
 
-       for (i = 0; i < IMA_DIGEST_SIZE; i++)
+       for (i = 0; i < size; i++)
                seq_printf(m, "%02x", *(digest + i));
 }
 
-void ima_template_show(struct seq_file *m, void *e, enum ima_show_type show)
-{
-       struct ima_template_data *entry = e;
-       int namelen;
-
-       switch (show) {
-       case IMA_SHOW_ASCII:
-               ima_print_digest(m, entry->digest);
-               seq_printf(m, " %s\n", entry->file_name);
-               break;
-       case IMA_SHOW_BINARY:
-               ima_putc(m, entry->digest, IMA_DIGEST_SIZE);
-
-               namelen = strlen(entry->file_name);
-               ima_putc(m, &namelen, sizeof namelen);
-               ima_putc(m, entry->file_name, namelen);
-       default:
-               break;
-       }
-}
-
 /* print in ascii */
 static int ima_ascii_measurements_show(struct seq_file *m, void *v)
 {
        /* the list never shrinks, so we don't need a lock here */
        struct ima_queue_entry *qe = v;
        struct ima_template_entry *e;
+       int i;
 
        /* get entry */
        e = qe->entry;
@@ -213,14 +201,21 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v)
        seq_printf(m, "%2d ", CONFIG_IMA_MEASURE_PCR_IDX);
 
        /* 2nd: SHA1 template hash */
-       ima_print_digest(m, e->digest);
+       ima_print_digest(m, e->digest, TPM_DIGEST_SIZE);
 
        /* 3th:  template name */
-       seq_printf(m, " %s ", e->template_name);
+       seq_printf(m, " %s", e->template_desc->name);
 
        /* 4th:  template specific data */
-       ima_template_show(m, (struct ima_template_data *)&e->template,
-                         IMA_SHOW_ASCII);
+       for (i = 0; i < e->template_desc->num_fields; i++) {
+               seq_puts(m, " ");
+               if (e->template_data[i].len == 0)
+                       continue;
+
+               e->template_desc->fields[i]->field_show(m, IMA_SHOW_ASCII,
+                                                       &e->template_data[i]);
+       }
+       seq_puts(m, "\n");
        return 0;
 }
 
index 162ea723db3df5f07a2dd23f4bcc66e1af03a898..15f34bd40abed1530216be53c59658b6245d280b 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <crypto/hash_info.h>
 #include "ima.h"
 
 /* name for boot aggregate entry */
@@ -42,28 +43,38 @@ int ima_used_chip;
 static void __init ima_add_boot_aggregate(void)
 {
        struct ima_template_entry *entry;
+       struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
        const char *op = "add_boot_aggregate";
        const char *audit_cause = "ENOMEM";
        int result = -ENOMEM;
-       int violation = 1;
+       int violation = 0;
+       struct {
+               struct ima_digest_data hdr;
+               char digest[TPM_DIGEST_SIZE];
+       } hash;
 
-       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-       if (!entry)
-               goto err_out;
+       memset(iint, 0, sizeof(*iint));
+       memset(&hash, 0, sizeof(hash));
+       iint->ima_hash = &hash.hdr;
+       iint->ima_hash->algo = HASH_ALGO_SHA1;
+       iint->ima_hash->length = SHA1_DIGEST_SIZE;
 
-       memset(&entry->template, 0, sizeof(entry->template));
-       strncpy(entry->template.file_name, boot_aggregate_name,
-               IMA_EVENT_NAME_LEN_MAX);
        if (ima_used_chip) {
-               violation = 0;
-               result = ima_calc_boot_aggregate(entry->template.digest);
+               result = ima_calc_boot_aggregate(&hash.hdr);
                if (result < 0) {
                        audit_cause = "hashing_error";
                        kfree(entry);
                        goto err_out;
                }
        }
-       result = ima_store_template(entry, violation, NULL);
+
+       result = ima_alloc_init_template(iint, NULL, boot_aggregate_name,
+                                        NULL, 0, &entry);
+       if (result < 0)
+               return;
+
+       result = ima_store_template(entry, violation, NULL,
+                                   boot_aggregate_name);
        if (result < 0)
                kfree(entry);
        return;
@@ -74,7 +85,7 @@ err_out:
 
 int __init ima_init(void)
 {
-       u8 pcr_i[IMA_DIGEST_SIZE];
+       u8 pcr_i[TPM_DIGEST_SIZE];
        int rc;
 
        ima_used_chip = 0;
@@ -88,6 +99,10 @@ int __init ima_init(void)
        rc = ima_init_crypto();
        if (rc)
                return rc;
+       rc = ima_init_template();
+       if (rc != 0)
+               return rc;
+
        ima_add_boot_aggregate();       /* boot aggregate must be first entry */
        ima_init_policy();
 
index e9508d5bbfcff7ee087d069697c6088da7da0210..149ee1119f87ba37c7673efdd7fd4d0e50cae809 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/xattr.h>
 #include <linux/ima.h>
+#include <crypto/hash_info.h>
 
 #include "ima.h"
 
@@ -35,11 +36,33 @@ int ima_appraise = IMA_APPRAISE_ENFORCE;
 int ima_appraise;
 #endif
 
-char *ima_hash = "sha1";
+int ima_hash_algo = HASH_ALGO_SHA1;
+static int hash_setup_done;
+
 static int __init hash_setup(char *str)
 {
-       if (strncmp(str, "md5", 3) == 0)
-               ima_hash = "md5";
+       struct ima_template_desc *template_desc = ima_template_desc_current();
+       int i;
+
+       if (hash_setup_done)
+               return 1;
+
+       if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) {
+               if (strncmp(str, "sha1", 4) == 0)
+                       ima_hash_algo = HASH_ALGO_SHA1;
+               else if (strncmp(str, "md5", 3) == 0)
+                       ima_hash_algo = HASH_ALGO_MD5;
+               goto out;
+       }
+
+       for (i = 0; i < HASH_ALGO__LAST; i++) {
+               if (strcmp(str, hash_algo_name[i]) == 0) {
+                       ima_hash_algo = i;
+                       break;
+               }
+       }
+out:
+       hash_setup_done = 1;
        return 1;
 }
 __setup("ima_hash=", hash_setup);
@@ -92,10 +115,9 @@ out:
                pathname = dentry->d_name.name;
 
        if (send_tomtou)
-               ima_add_violation(inode, pathname,
-                                 "invalid_pcr", "ToMToU");
+               ima_add_violation(file, pathname, "invalid_pcr", "ToMToU");
        if (send_writers)
-               ima_add_violation(inode, pathname,
+               ima_add_violation(file, pathname,
                                  "invalid_pcr", "open_writers");
        kfree(pathbuf);
 }
@@ -144,9 +166,12 @@ static int process_measurement(struct file *file, const char *filename,
 {
        struct inode *inode = file_inode(file);
        struct integrity_iint_cache *iint;
+       struct ima_template_desc *template_desc = ima_template_desc_current();
        char *pathbuf = NULL;
        const char *pathname = NULL;
        int rc = -ENOMEM, action, must_appraise, _func;
+       struct evm_ima_xattr_data *xattr_value = NULL, **xattr_ptr = NULL;
+       int xattr_len = 0;
 
        if (!ima_initialized || !S_ISREG(inode->i_mode))
                return 0;
@@ -185,7 +210,13 @@ static int process_measurement(struct file *file, const char *filename,
                goto out_digsig;
        }
 
-       rc = ima_collect_measurement(iint, file);
+       if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) {
+               if (action & IMA_APPRAISE_SUBMASK)
+                       xattr_ptr = &xattr_value;
+       } else
+               xattr_ptr = &xattr_value;
+
+       rc = ima_collect_measurement(iint, file, xattr_ptr, &xattr_len);
        if (rc != 0)
                goto out_digsig;
 
@@ -194,9 +225,11 @@ static int process_measurement(struct file *file, const char *filename,
                pathname = (const char *)file->f_dentry->d_name.name;
 
        if (action & IMA_MEASURE)
-               ima_store_measurement(iint, file, pathname);
+               ima_store_measurement(iint, file, pathname,
+                                     xattr_value, xattr_len);
        if (action & IMA_APPRAISE_SUBMASK)
-               rc = ima_appraise_measurement(_func, iint, file, pathname);
+               rc = ima_appraise_measurement(_func, iint, file, pathname,
+                                             xattr_value, xattr_len);
        if (action & IMA_AUDIT)
                ima_audit_measurement(iint, pathname);
        kfree(pathbuf);
@@ -205,6 +238,7 @@ out_digsig:
                rc = -EACCES;
 out:
        mutex_unlock(&inode->i_mutex);
+       kfree(xattr_value);
        if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE))
                return -EACCES;
        return 0;
@@ -244,9 +278,9 @@ int ima_file_mmap(struct file *file, unsigned long prot)
 int ima_bprm_check(struct linux_binprm *bprm)
 {
        return process_measurement(bprm->file,
-                                (strcmp(bprm->filename, bprm->interp) == 0) ?
-                                bprm->filename : bprm->interp,
-                                MAY_EXEC, BPRM_CHECK);
+                                  (strcmp(bprm->filename, bprm->interp) == 0) ?
+                                  bprm->filename : bprm->interp,
+                                  MAY_EXEC, BPRM_CHECK);
 }
 
 /**
@@ -263,8 +297,8 @@ int ima_file_check(struct file *file, int mask)
 {
        ima_rdwr_violation_check(file);
        return process_measurement(file, NULL,
-                                mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
-                                FILE_CHECK);
+                                  mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+                                  FILE_CHECK);
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
@@ -294,6 +328,7 @@ static int __init init_ima(void)
 {
        int error;
 
+       hash_setup(CONFIG_IMA_DEFAULT_HASH);
        error = ima_init();
        if (!error)
                ima_initialized = 1;
index 399433ad614e0d26cc05588014991632a80865b0..a9c3d3cd1990d506a431614ffcf1d63ee53434a3 100644 (file)
@@ -73,7 +73,6 @@ static struct ima_rule_entry default_rules[] = {
        {.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC},
-       {.action = DONT_MEASURE,.fsmagic = RAMFS_MAGIC,.flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE,.fsmagic = DEVPTS_SUPER_MAGIC,.flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE,.fsmagic = BINFMTFS_MAGIC,.flags = IMA_FSMAGIC},
        {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC},
index ff63fe00c19554921b172425d5261495a98a6bcf..d85e99761f4fc66afa251bb2bd657062d84d9b77 100644 (file)
@@ -50,7 +50,7 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value)
        key = ima_hash_key(digest_value);
        rcu_read_lock();
        hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) {
-               rc = memcmp(qe->entry->digest, digest_value, IMA_DIGEST_SIZE);
+               rc = memcmp(qe->entry->digest, digest_value, TPM_DIGEST_SIZE);
                if (rc == 0) {
                        ret = qe;
                        break;
@@ -104,9 +104,10 @@ static int ima_pcr_extend(const u8 *hash)
  * and extend the pcr.
  */
 int ima_add_template_entry(struct ima_template_entry *entry, int violation,
-                          const char *op, struct inode *inode)
+                          const char *op, struct inode *inode,
+                          const unsigned char *filename)
 {
-       u8 digest[IMA_DIGEST_SIZE];
+       u8 digest[TPM_DIGEST_SIZE];
        const char *audit_cause = "hash_added";
        char tpm_audit_cause[AUDIT_CAUSE_LEN_MAX];
        int audit_info = 1;
@@ -141,8 +142,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
        }
 out:
        mutex_unlock(&ima_extend_list_mutex);
-       integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-                           entry->template.file_name,
+       integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
                            op, audit_cause, result, audit_info);
        return result;
 }
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
new file mode 100644 (file)
index 0000000..4e5da99
--- /dev/null
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template.c
+ *      Helpers to manage template descriptors.
+ */
+#include <crypto/hash_info.h>
+
+#include "ima.h"
+#include "ima_template_lib.h"
+
+static struct ima_template_desc defined_templates[] = {
+       {.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT},
+       {.name = "ima-ng",.fmt = "d-ng|n-ng"},
+       {.name = "ima-sig",.fmt = "d-ng|n-ng|sig"},
+};
+
+static struct ima_template_field supported_fields[] = {
+       {.field_id = "d",.field_init = ima_eventdigest_init,
+        .field_show = ima_show_template_digest},
+       {.field_id = "n",.field_init = ima_eventname_init,
+        .field_show = ima_show_template_string},
+       {.field_id = "d-ng",.field_init = ima_eventdigest_ng_init,
+        .field_show = ima_show_template_digest_ng},
+       {.field_id = "n-ng",.field_init = ima_eventname_ng_init,
+        .field_show = ima_show_template_string},
+       {.field_id = "sig",.field_init = ima_eventsig_init,
+        .field_show = ima_show_template_sig},
+};
+
+static struct ima_template_desc *ima_template;
+static struct ima_template_desc *lookup_template_desc(const char *name);
+
+static int __init ima_template_setup(char *str)
+{
+       struct ima_template_desc *template_desc;
+       int template_len = strlen(str);
+
+       /*
+        * Verify that a template with the supplied name exists.
+        * If not, use CONFIG_IMA_DEFAULT_TEMPLATE.
+        */
+       template_desc = lookup_template_desc(str);
+       if (!template_desc)
+               return 1;
+
+       /*
+        * Verify whether the current hash algorithm is supported
+        * by the 'ima' template.
+        */
+       if (template_len == 3 && strcmp(str, IMA_TEMPLATE_IMA_NAME) == 0 &&
+           ima_hash_algo != HASH_ALGO_SHA1 && ima_hash_algo != HASH_ALGO_MD5) {
+               pr_err("IMA: template does not support hash alg\n");
+               return 1;
+       }
+
+       ima_template = template_desc;
+       return 1;
+}
+__setup("ima_template=", ima_template_setup);
+
+static struct ima_template_desc *lookup_template_desc(const char *name)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(defined_templates); i++) {
+               if (strcmp(defined_templates[i].name, name) == 0)
+                       return defined_templates + i;
+       }
+
+       return NULL;
+}
+
+static struct ima_template_field *lookup_template_field(const char *field_id)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(supported_fields); i++)
+               if (strncmp(supported_fields[i].field_id, field_id,
+                           IMA_TEMPLATE_FIELD_ID_MAX_LEN) == 0)
+                       return &supported_fields[i];
+       return NULL;
+}
+
+static int template_fmt_size(char *template_fmt)
+{
+       char c;
+       int template_fmt_len = strlen(template_fmt);
+       int i = 0, j = 0;
+
+       while (i < template_fmt_len) {
+               c = template_fmt[i];
+               if (c == '|')
+                       j++;
+               i++;
+       }
+
+       return j + 1;
+}
+
+static int template_desc_init_fields(char *template_fmt,
+                                    struct ima_template_field ***fields,
+                                    int *num_fields)
+{
+       char *c, *template_fmt_ptr = template_fmt;
+       int template_num_fields = template_fmt_size(template_fmt);
+       int i, result = 0;
+
+       if (template_num_fields > IMA_TEMPLATE_NUM_FIELDS_MAX)
+               return -EINVAL;
+
+       *fields = kzalloc(template_num_fields * sizeof(*fields), GFP_KERNEL);
+       if (*fields == NULL) {
+               result = -ENOMEM;
+               goto out;
+       }
+       for (i = 0; (c = strsep(&template_fmt_ptr, "|")) != NULL &&
+            i < template_num_fields; i++) {
+               struct ima_template_field *f = lookup_template_field(c);
+
+               if (!f) {
+                       result = -ENOENT;
+                       goto out;
+               }
+               (*fields)[i] = f;
+       }
+       *num_fields = i;
+       return 0;
+out:
+       kfree(*fields);
+       *fields = NULL;
+       return result;
+}
+
+static int init_defined_templates(void)
+{
+       int i = 0;
+       int result = 0;
+
+       /* Init defined templates. */
+       for (i = 0; i < ARRAY_SIZE(defined_templates); i++) {
+               struct ima_template_desc *template = &defined_templates[i];
+
+               result = template_desc_init_fields(template->fmt,
+                                                  &(template->fields),
+                                                  &(template->num_fields));
+               if (result < 0)
+                       return result;
+       }
+       return result;
+}
+
+struct ima_template_desc *ima_template_desc_current(void)
+{
+       if (!ima_template)
+               ima_template =
+                   lookup_template_desc(CONFIG_IMA_DEFAULT_TEMPLATE);
+       return ima_template;
+}
+
+int ima_init_template(void)
+{
+       int result;
+
+       result = init_defined_templates();
+       if (result < 0)
+               return result;
+
+       return 0;
+}
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
new file mode 100644 (file)
index 0000000..6d66ad6
--- /dev/null
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template_lib.c
+ *      Library of supported template fields.
+ */
+#include <crypto/hash_info.h>
+
+#include "ima_template_lib.h"
+
+static bool ima_template_hash_algo_allowed(u8 algo)
+{
+       if (algo == HASH_ALGO_SHA1 || algo == HASH_ALGO_MD5)
+               return true;
+
+       return false;
+}
+
+enum data_formats {
+       DATA_FMT_DIGEST = 0,
+       DATA_FMT_DIGEST_WITH_ALGO,
+       DATA_FMT_EVENT_NAME,
+       DATA_FMT_STRING,
+       DATA_FMT_HEX
+};
+
+static int ima_write_template_field_data(const void *data, const u32 datalen,
+                                        enum data_formats datafmt,
+                                        struct ima_field_data *field_data)
+{
+       u8 *buf, *buf_ptr;
+       u32 buflen;
+
+       switch (datafmt) {
+       case DATA_FMT_EVENT_NAME:
+               buflen = IMA_EVENT_NAME_LEN_MAX + 1;
+               break;
+       case DATA_FMT_STRING:
+               buflen = datalen + 1;
+               break;
+       default:
+               buflen = datalen;
+       }
+
+       buf = kzalloc(buflen, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       memcpy(buf, data, datalen);
+
+       /*
+        * Replace all space characters with underscore for event names and
+        * strings. This avoid that, during the parsing of a measurements list,
+        * filenames with spaces or that end with the suffix ' (deleted)' are
+        * split into multiple template fields (the space is the delimitator
+        * character for measurements lists in ASCII format).
+        */
+       if (datafmt == DATA_FMT_EVENT_NAME || datafmt == DATA_FMT_STRING) {
+               for (buf_ptr = buf; buf_ptr - buf < datalen; buf_ptr++)
+                       if (*buf_ptr == ' ')
+                               *buf_ptr = '_';
+       }
+
+       field_data->data = buf;
+       field_data->len = buflen;
+       return 0;
+}
+
+static void ima_show_template_data_ascii(struct seq_file *m,
+                                        enum ima_show_type show,
+                                        enum data_formats datafmt,
+                                        struct ima_field_data *field_data)
+{
+       u8 *buf_ptr = field_data->data, buflen = field_data->len;
+
+       switch (datafmt) {
+       case DATA_FMT_DIGEST_WITH_ALGO:
+               buf_ptr = strnchr(field_data->data, buflen, ':');
+               if (buf_ptr != field_data->data)
+                       seq_printf(m, "%s", field_data->data);
+
+               /* skip ':' and '\0' */
+               buf_ptr += 2;
+               buflen -= buf_ptr - field_data->data;
+       case DATA_FMT_DIGEST:
+       case DATA_FMT_HEX:
+               if (!buflen)
+                       break;
+               ima_print_digest(m, buf_ptr, buflen);
+               break;
+       case DATA_FMT_STRING:
+               seq_printf(m, "%s", buf_ptr);
+               break;
+       default:
+               break;
+       }
+}
+
+static void ima_show_template_data_binary(struct seq_file *m,
+                                         enum ima_show_type show,
+                                         enum data_formats datafmt,
+                                         struct ima_field_data *field_data)
+{
+       ima_putc(m, &field_data->len, sizeof(u32));
+       if (!field_data->len)
+               return;
+       ima_putc(m, field_data->data, field_data->len);
+}
+
+static void ima_show_template_field_data(struct seq_file *m,
+                                        enum ima_show_type show,
+                                        enum data_formats datafmt,
+                                        struct ima_field_data *field_data)
+{
+       switch (show) {
+       case IMA_SHOW_ASCII:
+               ima_show_template_data_ascii(m, show, datafmt, field_data);
+               break;
+       case IMA_SHOW_BINARY:
+               ima_show_template_data_binary(m, show, datafmt, field_data);
+               break;
+       default:
+               break;
+       }
+}
+
+void ima_show_template_digest(struct seq_file *m, enum ima_show_type show,
+                             struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show, DATA_FMT_DIGEST, field_data);
+}
+
+void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show,
+                                struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show, DATA_FMT_DIGEST_WITH_ALGO,
+                                    field_data);
+}
+
+void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
+                             struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show, DATA_FMT_STRING, field_data);
+}
+
+void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
+                          struct ima_field_data *field_data)
+{
+       ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data);
+}
+
+static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 hash_algo,
+                                      struct ima_field_data *field_data,
+                                      bool size_limit)
+{
+       /*
+        * digest formats:
+        *  - DATA_FMT_DIGEST: digest
+        *  - DATA_FMT_DIGEST_WITH_ALGO: [<hash algo>] + ':' + '\0' + digest,
+        *    where <hash algo> is provided if the hash algoritm is not
+        *    SHA1 or MD5
+        */
+       u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 };
+       enum data_formats fmt = DATA_FMT_DIGEST;
+       u32 offset = 0;
+
+       if (!size_limit) {
+               fmt = DATA_FMT_DIGEST_WITH_ALGO;
+               if (hash_algo < HASH_ALGO__LAST)
+                       offset += snprintf(buffer, CRYPTO_MAX_ALG_NAME + 1,
+                                          "%s", hash_algo_name[hash_algo]);
+               buffer[offset] = ':';
+               offset += 2;
+       }
+
+       if (digest)
+               memcpy(buffer + offset, digest, digestsize);
+       else
+               /*
+                * If digest is NULL, the event being recorded is a violation.
+                * Make room for the digest by increasing the offset of
+                * IMA_DIGEST_SIZE.
+                */
+               offset += IMA_DIGEST_SIZE;
+
+       return ima_write_template_field_data(buffer, offset + digestsize,
+                                            fmt, field_data);
+}
+
+/*
+ * This function writes the digest of an event (with size limit).
+ */
+int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file,
+                        const unsigned char *filename,
+                        struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                        struct ima_field_data *field_data)
+{
+       struct {
+               struct ima_digest_data hdr;
+               char digest[IMA_MAX_DIGEST_SIZE];
+       } hash;
+       u8 *cur_digest = NULL;
+       u32 cur_digestsize = 0;
+       struct inode *inode;
+       int result;
+
+       memset(&hash, 0, sizeof(hash));
+
+       if (!iint)              /* recording a violation. */
+               goto out;
+
+       if (ima_template_hash_algo_allowed(iint->ima_hash->algo)) {
+               cur_digest = iint->ima_hash->digest;
+               cur_digestsize = iint->ima_hash->length;
+               goto out;
+       }
+
+       if (!file)              /* missing info to re-calculate the digest */
+               return -EINVAL;
+
+       inode = file_inode(file);
+       hash.hdr.algo = ima_template_hash_algo_allowed(ima_hash_algo) ?
+           ima_hash_algo : HASH_ALGO_SHA1;
+       result = ima_calc_file_hash(file, &hash.hdr);
+       if (result) {
+               integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode,
+                                   filename, "collect_data",
+                                   "failed", result, 0);
+               return result;
+       }
+       cur_digest = hash.hdr.digest;
+       cur_digestsize = hash.hdr.length;
+out:
+       return ima_eventdigest_init_common(cur_digest, cur_digestsize, -1,
+                                          field_data, true);
+}
+
+/*
+ * This function writes the digest of an event (without size limit).
+ */
+int ima_eventdigest_ng_init(struct integrity_iint_cache *iint,
+                           struct file *file, const unsigned char *filename,
+                           struct evm_ima_xattr_data *xattr_value,
+                           int xattr_len, struct ima_field_data *field_data)
+{
+       u8 *cur_digest = NULL, hash_algo = HASH_ALGO__LAST;
+       u32 cur_digestsize = 0;
+
+       /* If iint is NULL, we are recording a violation. */
+       if (!iint)
+               goto out;
+
+       cur_digest = iint->ima_hash->digest;
+       cur_digestsize = iint->ima_hash->length;
+
+       hash_algo = iint->ima_hash->algo;
+out:
+       return ima_eventdigest_init_common(cur_digest, cur_digestsize,
+                                          hash_algo, field_data, false);
+}
+
+static int ima_eventname_init_common(struct integrity_iint_cache *iint,
+                                    struct file *file,
+                                    const unsigned char *filename,
+                                    struct ima_field_data *field_data,
+                                    bool size_limit)
+{
+       const char *cur_filename = NULL;
+       u32 cur_filename_len = 0;
+       enum data_formats fmt = size_limit ?
+           DATA_FMT_EVENT_NAME : DATA_FMT_STRING;
+
+       BUG_ON(filename == NULL && file == NULL);
+
+       if (filename) {
+               cur_filename = filename;
+               cur_filename_len = strlen(filename);
+
+               if (!size_limit || cur_filename_len <= IMA_EVENT_NAME_LEN_MAX)
+                       goto out;
+       }
+
+       if (file) {
+               cur_filename = file->f_dentry->d_name.name;
+               cur_filename_len = strlen(cur_filename);
+       } else
+               /*
+                * Truncate filename if the latter is too long and
+                * the file descriptor is not available.
+                */
+               cur_filename_len = IMA_EVENT_NAME_LEN_MAX;
+out:
+       return ima_write_template_field_data(cur_filename, cur_filename_len,
+                                            fmt, field_data);
+}
+
+/*
+ * This function writes the name of an event (with size limit).
+ */
+int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file,
+                      const unsigned char *filename,
+                      struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                      struct ima_field_data *field_data)
+{
+       return ima_eventname_init_common(iint, file, filename,
+                                        field_data, true);
+}
+
+/*
+ * This function writes the name of an event (without size limit).
+ */
+int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file,
+                         const unsigned char *filename,
+                         struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                         struct ima_field_data *field_data)
+{
+       return ima_eventname_init_common(iint, file, filename,
+                                        field_data, false);
+}
+
+/*
+ *  ima_eventsig_init - include the file signature as part of the template data
+ */
+int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file,
+                     const unsigned char *filename,
+                     struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                     struct ima_field_data *field_data)
+{
+       enum data_formats fmt = DATA_FMT_HEX;
+       int rc = 0;
+
+       if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG))
+               goto out;
+
+       rc = ima_write_template_field_data(xattr_value, xattr_len, fmt,
+                                          field_data);
+out:
+       return rc;
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
new file mode 100644 (file)
index 0000000..63f6b52
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2013 Politecnico di Torino, Italy
+ *                    TORSEC group -- http://security.polito.it
+ *
+ * Author: Roberto Sassu <roberto.sassu@polito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * File: ima_template_lib.h
+ *      Header for the library of supported template fields.
+ */
+#ifndef __LINUX_IMA_TEMPLATE_LIB_H
+#define __LINUX_IMA_TEMPLATE_LIB_H
+
+#include <linux/seq_file.h>
+#include "ima.h"
+
+void ima_show_template_digest(struct seq_file *m, enum ima_show_type show,
+                             struct ima_field_data *field_data);
+void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show,
+                                struct ima_field_data *field_data);
+void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
+                             struct ima_field_data *field_data);
+void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
+                          struct ima_field_data *field_data);
+int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file,
+                        const unsigned char *filename,
+                        struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                        struct ima_field_data *field_data);
+int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file,
+                      const unsigned char *filename,
+                      struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                      struct ima_field_data *field_data);
+int ima_eventdigest_ng_init(struct integrity_iint_cache *iint,
+                           struct file *file, const unsigned char *filename,
+                           struct evm_ima_xattr_data *xattr_value,
+                           int xattr_len, struct ima_field_data *field_data);
+int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file,
+                         const unsigned char *filename,
+                         struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                         struct ima_field_data *field_data);
+int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file,
+                     const unsigned char *filename,
+                     struct evm_ima_xattr_data *xattr_value, int xattr_len,
+                     struct ima_field_data *field_data);
+#endif /* __LINUX_IMA_TEMPLATE_LIB_H */
index c42fb7a70dee78dfdf1ebbc5f1d3a43fb6c6c233..b9e7c133734a2dc5796fe98f5c15f8c81ebc5d26 100644 (file)
@@ -54,25 +54,57 @@ enum evm_ima_xattr_type {
        IMA_XATTR_DIGEST = 0x01,
        EVM_XATTR_HMAC,
        EVM_IMA_XATTR_DIGSIG,
+       IMA_XATTR_DIGEST_NG,
 };
 
 struct evm_ima_xattr_data {
        u8 type;
        u8 digest[SHA1_DIGEST_SIZE];
-}  __attribute__((packed));
+} __packed;
+
+#define IMA_MAX_DIGEST_SIZE    64
+
+struct ima_digest_data {
+       u8 algo;
+       u8 length;
+       union {
+               struct {
+                       u8 unused;
+                       u8 type;
+               } sha1;
+               struct {
+                       u8 type;
+                       u8 algo;
+               } ng;
+               u8 data[2];
+       } xattr;
+       u8 digest[0];
+} __packed;
+
+/*
+ * signature format v2 - for using with asymmetric keys
+ */
+struct signature_v2_hdr {
+       uint8_t type;           /* xattr type */
+       uint8_t version;        /* signature format version */
+       uint8_t hash_algo;      /* Digest algorithm [enum pkey_hash_algo] */
+       uint32_t keyid;         /* IMA key identifier - not X509/PGP specific */
+       uint16_t sig_size;      /* signature size */
+       uint8_t sig[0];         /* signature payload */
+} __packed;
 
 /* integrity data associated with an inode */
 struct integrity_iint_cache {
-       struct rb_node rb_node; /* rooted in integrity_iint_tree */
+       struct rb_node rb_node; /* rooted in integrity_iint_tree */
        struct inode *inode;    /* back pointer to inode in question */
        u64 version;            /* track inode changes */
        unsigned long flags;
-       struct evm_ima_xattr_data ima_xattr;
        enum integrity_status ima_file_status:4;
        enum integrity_status ima_mmap_status:4;
        enum integrity_status ima_bprm_status:4;
        enum integrity_status ima_module_status:4;
        enum integrity_status evm_status:4;
+       struct ima_digest_data *ima_hash;
 };
 
 /* rbtree tree calls to lookup, insert, delete
@@ -89,7 +121,7 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
 #ifdef CONFIG_INTEGRITY_SIGNATURE
 
 int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
-                                       const char *digest, int digestlen);
+                           const char *digest, int digestlen);
 
 #else
 
@@ -105,12 +137,19 @@ static inline int integrity_digsig_verify(const unsigned int id,
 #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
 int asymmetric_verify(struct key *keyring, const char *sig,
                      int siglen, const char *data, int datalen);
+
+int integrity_init_keyring(const unsigned int id);
 #else
 static inline int asymmetric_verify(struct key *keyring, const char *sig,
                                    int siglen, const char *data, int datalen)
 {
        return -EOPNOTSUPP;
 }
+
+static int integrity_init_keyring(const unsigned int id)
+{
+       return 0;
+}
 #endif
 
 #ifdef CONFIG_INTEGRITY_AUDIT
index a90d6d300dbd3b0b5849cae74af644e576e6dc45..a4f3f8c48d6e3f0aa8c5d7d21f900f425d517c71 100644 (file)
@@ -4,6 +4,7 @@
 
 config KEYS
        bool "Enable access key retention support"
+       select ASSOCIATIVE_ARRAY
        help
          This option provides support for retaining authentication tokens and
          access keys in the kernel.
@@ -19,6 +20,34 @@ config KEYS
 
          If you are unsure as to whether this is required, answer N.
 
+config PERSISTENT_KEYRINGS
+       bool "Enable register of persistent per-UID keyrings"
+       depends on KEYS
+       help
+         This option provides a register of persistent per-UID keyrings,
+         primarily aimed at Kerberos key storage.  The keyrings are persistent
+         in the sense that they stay around after all processes of that UID
+         have exited, not that they survive the machine being rebooted.
+
+         A particular keyring may be accessed by either the user whose keyring
+         it is or by a process with administrative privileges.  The active
+         LSMs gets to rule on which admin-level processes get to access the
+         cache.
+
+         Keyrings are created and added into the register upon demand and get
+         removed if they expire (a default timeout is set upon creation).
+
+config BIG_KEYS
+       bool "Large payload keys"
+       depends on KEYS
+       depends on TMPFS
+       help
+         This option provides support for holding large keys within the kernel
+         (for example Kerberos ticket caches).  The data may be stored out to
+         swapspace by tmpfs.
+
+         If you are unsure as to whether this is required, answer N.
+
 config TRUSTED_KEYS
        tristate "TRUSTED KEYS"
        depends on KEYS && TCG_TPM
index 504aaa008388c1595716e40f0e62e5fcd44fba71..dfb3a7bededf548ac1eed24b094de858e7a07df6 100644 (file)
@@ -18,9 +18,11 @@ obj-y := \
 obj-$(CONFIG_KEYS_COMPAT) += compat.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
+obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
 
 #
 # Key types
 #
+obj-$(CONFIG_BIG_KEYS) += big_key.o
 obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
 obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
new file mode 100644 (file)
index 0000000..7f44c32
--- /dev/null
@@ -0,0 +1,207 @@
+/* Large capacity key type
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/file.h>
+#include <linux/shmem_fs.h>
+#include <linux/err.h>
+#include <keys/user-type.h>
+#include <keys/big_key-type.h>
+
+MODULE_LICENSE("GPL");
+
+/*
+ * If the data is under this limit, there's no point creating a shm file to
+ * hold it as the permanently resident metadata for the shmem fs will be at
+ * least as large as the data.
+ */
+#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry))
+
+/*
+ * big_key defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_big_key = {
+       .name                   = "big_key",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
+       .instantiate            = big_key_instantiate,
+       .match                  = user_match,
+       .revoke                 = big_key_revoke,
+       .destroy                = big_key_destroy,
+       .describe               = big_key_describe,
+       .read                   = big_key_read,
+};
+
+/*
+ * Instantiate a big key
+ */
+int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
+{
+       struct path *path = (struct path *)&key->payload.data2;
+       struct file *file;
+       ssize_t written;
+       size_t datalen = prep->datalen;
+       int ret;
+
+       ret = -EINVAL;
+       if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
+               goto error;
+
+       /* Set an arbitrary quota */
+       ret = key_payload_reserve(key, 16);
+       if (ret < 0)
+               goto error;
+
+       key->type_data.x[1] = datalen;
+
+       if (datalen > BIG_KEY_FILE_THRESHOLD) {
+               /* Create a shmem file to store the data in.  This will permit the data
+                * to be swapped out if needed.
+                *
+                * TODO: Encrypt the stored data with a temporary key.
+                */
+               file = shmem_file_setup("", datalen, 0);
+               if (IS_ERR(file)) {
+                       ret = PTR_ERR(file);
+                       goto err_quota;
+               }
+
+               written = kernel_write(file, prep->data, prep->datalen, 0);
+               if (written != datalen) {
+                       ret = written;
+                       if (written >= 0)
+                               ret = -ENOMEM;
+                       goto err_fput;
+               }
+
+               /* Pin the mount and dentry to the key so that we can open it again
+                * later
+                */
+               *path = file->f_path;
+               path_get(path);
+               fput(file);
+       } else {
+               /* Just store the data in a buffer */
+               void *data = kmalloc(datalen, GFP_KERNEL);
+               if (!data) {
+                       ret = -ENOMEM;
+                       goto err_quota;
+               }
+
+               key->payload.data = memcpy(data, prep->data, prep->datalen);
+       }
+       return 0;
+
+err_fput:
+       fput(file);
+err_quota:
+       key_payload_reserve(key, 0);
+error:
+       return ret;
+}
+
+/*
+ * dispose of the links from a revoked keyring
+ * - called with the key sem write-locked
+ */
+void big_key_revoke(struct key *key)
+{
+       struct path *path = (struct path *)&key->payload.data2;
+
+       /* clear the quota */
+       key_payload_reserve(key, 0);
+       if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD)
+               vfs_truncate(path, 0);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a big_key key
+ */
+void big_key_destroy(struct key *key)
+{
+       if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) {
+               struct path *path = (struct path *)&key->payload.data2;
+               path_put(path);
+               path->mnt = NULL;
+               path->dentry = NULL;
+       } else {
+               kfree(key->payload.data);
+               key->payload.data = NULL;
+       }
+}
+
+/*
+ * describe the big_key key
+ */
+void big_key_describe(const struct key *key, struct seq_file *m)
+{
+       unsigned long datalen = key->type_data.x[1];
+
+       seq_puts(m, key->description);
+
+       if (key_is_instantiated(key))
+               seq_printf(m, ": %lu [%s]",
+                          datalen,
+                          datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
+}
+
+/*
+ * read the key data
+ * - the key's semaphore is read-locked
+ */
+long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
+{
+       unsigned long datalen = key->type_data.x[1];
+       long ret;
+
+       if (!buffer || buflen < datalen)
+               return datalen;
+
+       if (datalen > BIG_KEY_FILE_THRESHOLD) {
+               struct path *path = (struct path *)&key->payload.data2;
+               struct file *file;
+               loff_t pos;
+
+               file = dentry_open(path, O_RDONLY, current_cred());
+               if (IS_ERR(file))
+                       return PTR_ERR(file);
+
+               pos = 0;
+               ret = vfs_read(file, buffer, datalen, &pos);
+               fput(file);
+               if (ret >= 0 && ret != datalen)
+                       ret = -EIO;
+       } else {
+               ret = datalen;
+               if (copy_to_user(buffer, key->payload.data, datalen) != 0)
+                       ret = -EFAULT;
+       }
+
+       return ret;
+}
+
+/*
+ * Module stuff
+ */
+static int __init big_key_init(void)
+{
+       return register_key_type(&key_type_big_key);
+}
+
+static void __exit big_key_cleanup(void)
+{
+       unregister_key_type(&key_type_big_key);
+}
+
+module_init(big_key_init);
+module_exit(big_key_cleanup);
index d65fa7fa29ba1a53b1ef4fb6d76c7aeafb7da65a..bbd32c729dbb4e019d1461116b84c25107e35ab8 100644 (file)
@@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
        case KEYCTL_INVALIDATE:
                return keyctl_invalidate_key(arg2);
 
+       case KEYCTL_GET_PERSISTENT:
+               return keyctl_get_persistent(arg2, arg3);
+
        default:
                return -EOPNOTSUPP;
        }
index d67c97bb10256d5dc5a9b74b3b8aaa37022f96b1..d3222b6d7d5979460ff63940066433e414b22ec4 100644 (file)
@@ -130,50 +130,6 @@ void key_gc_keytype(struct key_type *ktype)
        kleave("");
 }
 
-/*
- * Garbage collect pointers from a keyring.
- *
- * Not called with any locks held.  The keyring's key struct will not be
- * deallocated under us as only our caller may deallocate it.
- */
-static void key_gc_keyring(struct key *keyring, time_t limit)
-{
-       struct keyring_list *klist;
-       int loop;
-
-       kenter("%x", key_serial(keyring));
-
-       if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
-                             (1 << KEY_FLAG_REVOKED)))
-               goto dont_gc;
-
-       /* scan the keyring looking for dead keys */
-       rcu_read_lock();
-       klist = rcu_dereference(keyring->payload.subscriptions);
-       if (!klist)
-               goto unlock_dont_gc;
-
-       loop = klist->nkeys;
-       smp_rmb();
-       for (loop--; loop >= 0; loop--) {
-               struct key *key = rcu_dereference(klist->keys[loop]);
-               if (key_is_dead(key, limit))
-                       goto do_gc;
-       }
-
-unlock_dont_gc:
-       rcu_read_unlock();
-dont_gc:
-       kleave(" [no gc]");
-       return;
-
-do_gc:
-       rcu_read_unlock();
-
-       keyring_gc(keyring, limit);
-       kleave(" [gc]");
-}
-
 /*
  * Garbage collect a list of unreferenced, detached keys
  */
@@ -392,8 +348,7 @@ found_unreferenced_key:
         */
 found_keyring:
        spin_unlock(&key_serial_lock);
-       kdebug("scan keyring %d", key->serial);
-       key_gc_keyring(key, limit);
+       keyring_gc(key, limit);
        goto maybe_resched;
 
        /* We found a dead key that is still referenced.  Reset its type and
index d4f1468b9b50f46cd7d739544902a77a3ff40384..80b2aac4f50ceda614d03c815f7638aa88a0c933 100644 (file)
@@ -89,42 +89,53 @@ extern struct key_type *key_type_lookup(const char *type);
 extern void key_type_put(struct key_type *ktype);
 
 extern int __key_link_begin(struct key *keyring,
-                           const struct key_type *type,
-                           const char *description,
-                           unsigned long *_prealloc);
+                           const struct keyring_index_key *index_key,
+                           struct assoc_array_edit **_edit);
 extern int __key_link_check_live_key(struct key *keyring, struct key *key);
-extern void __key_link(struct key *keyring, struct key *key,
-                      unsigned long *_prealloc);
+extern void __key_link(struct key *key, struct assoc_array_edit **_edit);
 extern void __key_link_end(struct key *keyring,
-                          struct key_type *type,
-                          unsigned long prealloc);
+                          const struct keyring_index_key *index_key,
+                          struct assoc_array_edit *edit);
 
-extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-                                     const struct key_type *type,
-                                     const char *description,
-                                     key_perm_t perm);
+extern key_ref_t find_key_to_update(key_ref_t keyring_ref,
+                                   const struct keyring_index_key *index_key);
 
 extern struct key *keyring_search_instkey(struct key *keyring,
                                          key_serial_t target_id);
 
+extern int iterate_over_keyring(const struct key *keyring,
+                               int (*func)(const struct key *key, void *data),
+                               void *data);
+
 typedef int (*key_match_func_t)(const struct key *, const void *);
 
+struct keyring_search_context {
+       struct keyring_index_key index_key;
+       const struct cred       *cred;
+       key_match_func_t        match;
+       const void              *match_data;
+       unsigned                flags;
+#define KEYRING_SEARCH_LOOKUP_TYPE     0x0001  /* [as type->def_lookup_type] */
+#define KEYRING_SEARCH_NO_STATE_CHECK  0x0002  /* Skip state checks */
+#define KEYRING_SEARCH_DO_STATE_CHECK  0x0004  /* Override NO_STATE_CHECK */
+#define KEYRING_SEARCH_NO_UPDATE_TIME  0x0008  /* Don't update times */
+#define KEYRING_SEARCH_NO_CHECK_PERM   0x0010  /* Don't check permissions */
+#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020  /* Give an error on excessive depth */
+
+       int (*iterator)(const void *object, void *iterator_data);
+
+       /* Internal stuff */
+       int                     skipped_ret;
+       bool                    possessed;
+       key_ref_t               result;
+       struct timespec         now;
+};
+
 extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-                                   const struct cred *cred,
-                                   struct key_type *type,
-                                   const void *description,
-                                   key_match_func_t match,
-                                   bool no_state_check);
-
-extern key_ref_t search_my_process_keyrings(struct key_type *type,
-                                           const void *description,
-                                           key_match_func_t match,
-                                           bool no_state_check,
-                                           const struct cred *cred);
-extern key_ref_t search_process_keyrings(struct key_type *type,
-                                        const void *description,
-                                        key_match_func_t match,
-                                        const struct cred *cred);
+                                   struct keyring_search_context *ctx);
+
+extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
+extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
 
 extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
 
@@ -202,7 +213,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
 /*
  * Determine whether a key is dead.
  */
-static inline bool key_is_dead(struct key *key, time_t limit)
+static inline bool key_is_dead(const struct key *key, time_t limit)
 {
        return
                key->flags & ((1 << KEY_FLAG_DEAD) |
@@ -244,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t);
 extern long keyctl_instantiate_key_common(key_serial_t,
                                          const struct iovec *,
                                          unsigned, size_t, key_serial_t);
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+extern long keyctl_get_persistent(uid_t, key_serial_t);
+extern unsigned persistent_keyring_expiry;
+#else
+static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring)
+{
+       return -EOPNOTSUPP;
+}
+#endif
 
 /*
  * Debugging key validation
index 8fb7c7bd465769cb5dca49e6d6f1ad011c75de63..55d110f0acedc96d17bcc5f5903aaa743ed6cb32 100644 (file)
@@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                }
        }
 
-       desclen = strlen(desc) + 1;
-       quotalen = desclen + type->def_datalen;
+       desclen = strlen(desc);
+       quotalen = desclen + 1 + type->def_datalen;
 
        /* get hold of the key tracking for this user */
        user = key_user_lookup(uid);
@@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                goto no_memory_2;
 
        if (desc) {
-               key->description = kmemdup(desc, desclen, GFP_KERNEL);
+               key->index_key.desc_len = desclen;
+               key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
                if (!key->description)
                        goto no_memory_3;
        }
@@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
        atomic_set(&key->usage, 1);
        init_rwsem(&key->sem);
        lockdep_set_class(&key->sem, &type->lock_class);
-       key->type = type;
+       key->index_key.type = type;
        key->user = user;
        key->quotalen = quotalen;
        key->datalen = type->def_datalen;
@@ -299,6 +300,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 
        if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
                key->flags |= 1 << KEY_FLAG_IN_QUOTA;
+       if (flags & KEY_ALLOC_TRUSTED)
+               key->flags |= 1 << KEY_FLAG_TRUSTED;
 
        memset(&key->type_data, 0, sizeof(key->type_data));
 
@@ -408,7 +411,7 @@ static int __key_instantiate_and_link(struct key *key,
                                      struct key_preparsed_payload *prep,
                                      struct key *keyring,
                                      struct key *authkey,
-                                     unsigned long *_prealloc)
+                                     struct assoc_array_edit **_edit)
 {
        int ret, awaken;
 
@@ -435,7 +438,7 @@ static int __key_instantiate_and_link(struct key *key,
 
                        /* and link it into the destination keyring */
                        if (keyring)
-                               __key_link(keyring, key, _prealloc);
+                               __key_link(key, _edit);
 
                        /* disable the authorisation key */
                        if (authkey)
@@ -475,7 +478,7 @@ int key_instantiate_and_link(struct key *key,
                             struct key *authkey)
 {
        struct key_preparsed_payload prep;
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
        int ret;
 
        memset(&prep, 0, sizeof(prep));
@@ -489,17 +492,15 @@ int key_instantiate_and_link(struct key *key,
        }
 
        if (keyring) {
-               ret = __key_link_begin(keyring, key->type, key->description,
-                                      &prealloc);
+               ret = __key_link_begin(keyring, &key->index_key, &edit);
                if (ret < 0)
                        goto error_free_preparse;
        }
 
-       ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
-                                        &prealloc);
+       ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit);
 
        if (keyring)
-               __key_link_end(keyring, key->type, prealloc);
+               __key_link_end(keyring, &key->index_key, edit);
 
 error_free_preparse:
        if (key->type->preparse)
@@ -537,7 +538,7 @@ int key_reject_and_link(struct key *key,
                        struct key *keyring,
                        struct key *authkey)
 {
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
        struct timespec now;
        int ret, awaken, link_ret = 0;
 
@@ -548,8 +549,7 @@ int key_reject_and_link(struct key *key,
        ret = -EBUSY;
 
        if (keyring)
-               link_ret = __key_link_begin(keyring, key->type,
-                                           key->description, &prealloc);
+               link_ret = __key_link_begin(keyring, &key->index_key, &edit);
 
        mutex_lock(&key_construction_mutex);
 
@@ -557,9 +557,10 @@ int key_reject_and_link(struct key *key,
        if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
                /* mark the key as being negatively instantiated */
                atomic_inc(&key->user->nikeys);
+               key->type_data.reject_error = -error;
+               smp_wmb();
                set_bit(KEY_FLAG_NEGATIVE, &key->flags);
                set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
-               key->type_data.reject_error = -error;
                now = current_kernel_time();
                key->expiry = now.tv_sec + timeout;
                key_schedule_gc(key->expiry + key_gc_delay);
@@ -571,7 +572,7 @@ int key_reject_and_link(struct key *key,
 
                /* and link it into the destination keyring */
                if (keyring && link_ret == 0)
-                       __key_link(keyring, key, &prealloc);
+                       __key_link(key, &edit);
 
                /* disable the authorisation key */
                if (authkey)
@@ -581,7 +582,7 @@ int key_reject_and_link(struct key *key,
        mutex_unlock(&key_construction_mutex);
 
        if (keyring)
-               __key_link_end(keyring, key->type, prealloc);
+               __key_link_end(keyring, &key->index_key, edit);
 
        /* wake up anyone waiting for a key to be constructed */
        if (awaken)
@@ -645,7 +646,7 @@ found:
        /* this races with key_put(), but that doesn't matter since key_put()
         * doesn't actually change the key
         */
-       atomic_inc(&key->usage);
+       __key_get(key);
 
 error:
        spin_unlock(&key_serial_lock);
@@ -780,25 +781,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
                               key_perm_t perm,
                               unsigned long flags)
 {
-       unsigned long prealloc;
+       struct keyring_index_key index_key = {
+               .description    = description,
+       };
        struct key_preparsed_payload prep;
+       struct assoc_array_edit *edit;
        const struct cred *cred = current_cred();
-       struct key_type *ktype;
        struct key *keyring, *key = NULL;
        key_ref_t key_ref;
        int ret;
 
        /* look up the key type to see if it's one of the registered kernel
         * types */
-       ktype = key_type_lookup(type);
-       if (IS_ERR(ktype)) {
+       index_key.type = key_type_lookup(type);
+       if (IS_ERR(index_key.type)) {
                key_ref = ERR_PTR(-ENODEV);
                goto error;
        }
 
        key_ref = ERR_PTR(-EINVAL);
-       if (!ktype->match || !ktype->instantiate ||
-           (!description && !ktype->preparse))
+       if (!index_key.type->match || !index_key.type->instantiate ||
+           (!index_key.description && !index_key.type->preparse))
                goto error_put_type;
 
        keyring = key_ref_to_ptr(keyring_ref);
@@ -812,21 +815,28 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
        memset(&prep, 0, sizeof(prep));
        prep.data = payload;
        prep.datalen = plen;
-       prep.quotalen = ktype->def_datalen;
-       if (ktype->preparse) {
-               ret = ktype->preparse(&prep);
+       prep.quotalen = index_key.type->def_datalen;
+       prep.trusted = flags & KEY_ALLOC_TRUSTED;
+       if (index_key.type->preparse) {
+               ret = index_key.type->preparse(&prep);
                if (ret < 0) {
                        key_ref = ERR_PTR(ret);
                        goto error_put_type;
                }
-               if (!description)
-                       description = prep.description;
+               if (!index_key.description)
+                       index_key.description = prep.description;
                key_ref = ERR_PTR(-EINVAL);
-               if (!description)
+               if (!index_key.description)
                        goto error_free_prep;
        }
+       index_key.desc_len = strlen(index_key.description);
+
+       key_ref = ERR_PTR(-EPERM);
+       if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags))
+               goto error_free_prep;
+       flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0;
 
-       ret = __key_link_begin(keyring, ktype, description, &prealloc);
+       ret = __key_link_begin(keyring, &index_key, &edit);
        if (ret < 0) {
                key_ref = ERR_PTR(ret);
                goto error_free_prep;
@@ -844,10 +854,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
         * key of the same type and description in the destination keyring and
         * update that instead if possible
         */
-       if (ktype->update) {
-               key_ref = __keyring_search_one(keyring_ref, ktype, description,
-                                              0);
-               if (!IS_ERR(key_ref))
+       if (index_key.type->update) {
+               key_ref = find_key_to_update(keyring_ref, &index_key);
+               if (key_ref)
                        goto found_matching_key;
        }
 
@@ -856,23 +865,24 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
                perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
                perm |= KEY_USR_VIEW;
 
-               if (ktype->read)
+               if (index_key.type->read)
                        perm |= KEY_POS_READ;
 
-               if (ktype == &key_type_keyring || ktype->update)
+               if (index_key.type == &key_type_keyring ||
+                   index_key.type->update)
                        perm |= KEY_POS_WRITE;
        }
 
        /* allocate a new key */
-       key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred,
-                       perm, flags);
+       key = key_alloc(index_key.type, index_key.description,
+                       cred->fsuid, cred->fsgid, cred, perm, flags);
        if (IS_ERR(key)) {
                key_ref = ERR_CAST(key);
                goto error_link_end;
        }
 
        /* instantiate it and link it into the target keyring */
-       ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
+       ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit);
        if (ret < 0) {
                key_put(key);
                key_ref = ERR_PTR(ret);
@@ -882,12 +892,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
        key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
 
 error_link_end:
-       __key_link_end(keyring, ktype, prealloc);
+       __key_link_end(keyring, &index_key, edit);
 error_free_prep:
-       if (ktype->preparse)
-               ktype->free_preparse(&prep);
+       if (index_key.type->preparse)
+               index_key.type->free_preparse(&prep);
 error_put_type:
-       key_type_put(ktype);
+       key_type_put(index_key.type);
 error:
        return key_ref;
 
@@ -895,7 +905,7 @@ error:
        /* we found a matching key, so we're going to try to update it
         * - we can drop the locks first as we have the key pinned
         */
-       __key_link_end(keyring, ktype, prealloc);
+       __key_link_end(keyring, &index_key, edit);
 
        key_ref = __key_update(key_ref, &prep);
        goto error_free_prep;
index 33cfd27b4de29650ae6ad0e1eb45646714a00f27..cee72ce642221e816968cb81069407fe01edb138 100644 (file)
@@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
        case KEYCTL_INVALIDATE:
                return keyctl_invalidate_key((key_serial_t) arg2);
 
+       case KEYCTL_GET_PERSISTENT:
+               return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3);
+
        default:
                return -EOPNOTSUPP;
        }
index 6ece7f2e5707f45c2736ca4a05504c2dd391ea00..69f0cb7bab7e873f8d8997d71db7c42430f72ce6 100644 (file)
@@ -1,6 +1,6 @@
 /* Keyring handling
  *
- * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <keys/keyring-type.h>
+#include <keys/user-type.h>
+#include <linux/assoc_array_priv.h>
 #include <linux/uaccess.h>
 #include "internal.h"
 
-#define rcu_dereference_locked_keyring(keyring)                                \
-       (rcu_dereference_protected(                                     \
-               (keyring)->payload.subscriptions,                       \
-               rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define rcu_deref_link_locked(klist, index, keyring)                   \
-       (rcu_dereference_protected(                                     \
-               (klist)->keys[index],                                   \
-               rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define MAX_KEYRING_LINKS                                              \
-       min_t(size_t, USHRT_MAX - 1,                                    \
-             ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *)))
-
-#define KEY_LINK_FIXQUOTA 1UL
-
 /*
  * When plumbing the depths of the key tree, this sets a hard limit
  * set on how deep we're willing to go.
  */
 #define KEYRING_NAME_HASH_SIZE (1 << 5)
 
+/*
+ * We mark pointers we pass to the associative array with bit 1 set if
+ * they're keyrings and clear otherwise.
+ */
+#define KEYRING_PTR_SUBTYPE    0x2UL
+
+static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & KEYRING_PTR_SUBTYPE;
+}
+static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x)
+{
+       void *object = assoc_array_ptr_to_leaf(x);
+       return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE);
+}
+static inline void *keyring_key_to_ptr(struct key *key)
+{
+       if (key->type == &key_type_keyring)
+               return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE);
+       return key;
+}
+
 static struct list_head        keyring_name_hash[KEYRING_NAME_HASH_SIZE];
 static DEFINE_RWLOCK(keyring_name_lock);
 
@@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc)
  */
 static int keyring_instantiate(struct key *keyring,
                               struct key_preparsed_payload *prep);
-static int keyring_match(const struct key *keyring, const void *criterion);
 static void keyring_revoke(struct key *keyring);
 static void keyring_destroy(struct key *keyring);
 static void keyring_describe(const struct key *keyring, struct seq_file *m);
@@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring,
 
 struct key_type key_type_keyring = {
        .name           = "keyring",
-       .def_datalen    = sizeof(struct keyring_list),
+       .def_datalen    = 0,
        .instantiate    = keyring_instantiate,
-       .match          = keyring_match,
+       .match          = user_match,
        .revoke         = keyring_revoke,
        .destroy        = keyring_destroy,
        .describe       = keyring_describe,
@@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring,
 
        ret = -EINVAL;
        if (prep->datalen == 0) {
+               assoc_array_init(&keyring->keys);
                /* make the keyring available by name if it has one */
                keyring_publish_name(keyring);
                ret = 0;
@@ -136,14 +144,225 @@ static int keyring_instantiate(struct key *keyring,
 }
 
 /*
- * Match keyrings on their name
+ * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit.  Ideally we'd
+ * fold the carry back too, but that requires inline asm.
+ */
+static u64 mult_64x32_and_fold(u64 x, u32 y)
+{
+       u64 hi = (u64)(u32)(x >> 32) * y;
+       u64 lo = (u64)(u32)(x) * y;
+       return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32);
+}
+
+/*
+ * Hash a key type and description.
+ */
+static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
+{
+       const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
+       const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK;
+       const char *description = index_key->description;
+       unsigned long hash, type;
+       u32 piece;
+       u64 acc;
+       int n, desc_len = index_key->desc_len;
+
+       type = (unsigned long)index_key->type;
+
+       acc = mult_64x32_and_fold(type, desc_len + 13);
+       acc = mult_64x32_and_fold(acc, 9207);
+       for (;;) {
+               n = desc_len;
+               if (n <= 0)
+                       break;
+               if (n > 4)
+                       n = 4;
+               piece = 0;
+               memcpy(&piece, description, n);
+               description += n;
+               desc_len -= n;
+               acc = mult_64x32_and_fold(acc, piece);
+               acc = mult_64x32_and_fold(acc, 9207);
+       }
+
+       /* Fold the hash down to 32 bits if need be. */
+       hash = acc;
+       if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32)
+               hash ^= acc >> 32;
+
+       /* Squidge all the keyrings into a separate part of the tree to
+        * ordinary keys by making sure the lowest level segment in the hash is
+        * zero for keyrings and non-zero otherwise.
+        */
+       if (index_key->type != &key_type_keyring && (hash & level_mask) == 0)
+               return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
+       if (index_key->type == &key_type_keyring && (hash & level_mask) != 0)
+               return (hash + (hash << level_shift)) & ~level_mask;
+       return hash;
+}
+
+/*
+ * Build the next index key chunk.
+ *
+ * On 32-bit systems the index key is laid out as:
+ *
+ *     0       4       5       9...
+ *     hash    desclen typeptr desc[]
+ *
+ * On 64-bit systems:
+ *
+ *     0       8       9       17...
+ *     hash    desclen typeptr desc[]
+ *
+ * We return it one word-sized chunk at a time.
  */
-static int keyring_match(const struct key *keyring, const void *description)
+static unsigned long keyring_get_key_chunk(const void *data, int level)
+{
+       const struct keyring_index_key *index_key = data;
+       unsigned long chunk = 0;
+       long offset = 0;
+       int desc_len = index_key->desc_len, n = sizeof(chunk);
+
+       level /= ASSOC_ARRAY_KEY_CHUNK_SIZE;
+       switch (level) {
+       case 0:
+               return hash_key_type_and_desc(index_key);
+       case 1:
+               return ((unsigned long)index_key->type << 8) | desc_len;
+       case 2:
+               if (desc_len == 0)
+                       return (u8)((unsigned long)index_key->type >>
+                                   (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+               n--;
+               offset = 1;
+       default:
+               offset += sizeof(chunk) - 1;
+               offset += (level - 3) * sizeof(chunk);
+               if (offset >= desc_len)
+                       return 0;
+               desc_len -= offset;
+               if (desc_len > n)
+                       desc_len = n;
+               offset += desc_len;
+               do {
+                       chunk <<= 8;
+                       chunk |= ((u8*)index_key->description)[--offset];
+               } while (--desc_len > 0);
+
+               if (level == 2) {
+                       chunk <<= 8;
+                       chunk |= (u8)((unsigned long)index_key->type >>
+                                     (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+               }
+               return chunk;
+       }
+}
+
+static unsigned long keyring_get_object_key_chunk(const void *object, int level)
+{
+       const struct key *key = keyring_ptr_to_key(object);
+       return keyring_get_key_chunk(&key->index_key, level);
+}
+
+static bool keyring_compare_object(const void *object, const void *data)
 {
-       return keyring->description &&
-               strcmp(keyring->description, description) == 0;
+       const struct keyring_index_key *index_key = data;
+       const struct key *key = keyring_ptr_to_key(object);
+
+       return key->index_key.type == index_key->type &&
+               key->index_key.desc_len == index_key->desc_len &&
+               memcmp(key->index_key.description, index_key->description,
+                      index_key->desc_len) == 0;
 }
 
+/*
+ * Compare the index keys of a pair of objects and determine the bit position
+ * at which they differ - if they differ.
+ */
+static int keyring_diff_objects(const void *_a, const void *_b)
+{
+       const struct key *key_a = keyring_ptr_to_key(_a);
+       const struct key *key_b = keyring_ptr_to_key(_b);
+       const struct keyring_index_key *a = &key_a->index_key;
+       const struct keyring_index_key *b = &key_b->index_key;
+       unsigned long seg_a, seg_b;
+       int level, i;
+
+       level = 0;
+       seg_a = hash_key_type_and_desc(a);
+       seg_b = hash_key_type_and_desc(b);
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       /* The number of bits contributed by the hash is controlled by a
+        * constant in the assoc_array headers.  Everything else thereafter we
+        * can deal with as being machine word-size dependent.
+        */
+       level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
+       seg_a = a->desc_len;
+       seg_b = b->desc_len;
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       /* The next bit may not work on big endian */
+       level++;
+       seg_a = (unsigned long)a->type;
+       seg_b = (unsigned long)b->type;
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       level += sizeof(unsigned long);
+       if (a->desc_len == 0)
+               goto same;
+
+       i = 0;
+       if (((unsigned long)a->description | (unsigned long)b->description) &
+           (sizeof(unsigned long) - 1)) {
+               do {
+                       seg_a = *(unsigned long *)(a->description + i);
+                       seg_b = *(unsigned long *)(b->description + i);
+                       if ((seg_a ^ seg_b) != 0)
+                               goto differ_plus_i;
+                       i += sizeof(unsigned long);
+               } while (i < (a->desc_len & (sizeof(unsigned long) - 1)));
+       }
+
+       for (; i < a->desc_len; i++) {
+               seg_a = *(unsigned char *)(a->description + i);
+               seg_b = *(unsigned char *)(b->description + i);
+               if ((seg_a ^ seg_b) != 0)
+                       goto differ_plus_i;
+       }
+
+same:
+       return -1;
+
+differ_plus_i:
+       level += i;
+differ:
+       i = level * 8 + __ffs(seg_a ^ seg_b);
+       return i;
+}
+
+/*
+ * Free an object after stripping the keyring flag off of the pointer.
+ */
+static void keyring_free_object(void *object)
+{
+       key_put(keyring_ptr_to_key(object));
+}
+
+/*
+ * Operations for keyring management by the index-tree routines.
+ */
+static const struct assoc_array_ops keyring_assoc_array_ops = {
+       .get_key_chunk          = keyring_get_key_chunk,
+       .get_object_key_chunk   = keyring_get_object_key_chunk,
+       .compare_object         = keyring_compare_object,
+       .diff_objects           = keyring_diff_objects,
+       .free_object            = keyring_free_object,
+};
+
 /*
  * Clean up a keyring when it is destroyed.  Unpublish its name if it had one
  * and dispose of its data.
@@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description)
  */
 static void keyring_destroy(struct key *keyring)
 {
-       struct keyring_list *klist;
-       int loop;
-
        if (keyring->description) {
                write_lock(&keyring_name_lock);
 
@@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring)
                write_unlock(&keyring_name_lock);
        }
 
-       klist = rcu_access_pointer(keyring->payload.subscriptions);
-       if (klist) {
-               for (loop = klist->nkeys - 1; loop >= 0; loop--)
-                       key_put(rcu_access_pointer(klist->keys[loop]));
-               kfree(klist);
-       }
+       assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops);
 }
 
 /*
@@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring)
  */
 static void keyring_describe(const struct key *keyring, struct seq_file *m)
 {
-       struct keyring_list *klist;
-
        if (keyring->description)
                seq_puts(m, keyring->description);
        else
                seq_puts(m, "[anon]");
 
        if (key_is_instantiated(keyring)) {
-               rcu_read_lock();
-               klist = rcu_dereference(keyring->payload.subscriptions);
-               if (klist)
-                       seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
+               if (keyring->keys.nr_leaves_on_tree != 0)
+                       seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree);
                else
                        seq_puts(m, ": empty");
-               rcu_read_unlock();
        }
 }
 
+struct keyring_read_iterator_context {
+       size_t                  qty;
+       size_t                  count;
+       key_serial_t __user     *buffer;
+};
+
+static int keyring_read_iterator(const void *object, void *data)
+{
+       struct keyring_read_iterator_context *ctx = data;
+       const struct key *key = keyring_ptr_to_key(object);
+       int ret;
+
+       kenter("{%s,%d},,{%zu/%zu}",
+              key->type->name, key->serial, ctx->count, ctx->qty);
+
+       if (ctx->count >= ctx->qty)
+               return 1;
+
+       ret = put_user(key->serial, ctx->buffer);
+       if (ret < 0)
+               return ret;
+       ctx->buffer++;
+       ctx->count += sizeof(key->serial);
+       return 0;
+}
+
 /*
  * Read a list of key IDs from the keyring's contents in binary form
  *
- * The keyring's semaphore is read-locked by the caller.
+ * The keyring's semaphore is read-locked by the caller.  This prevents someone
+ * from modifying it under us - which could cause us to read key IDs multiple
+ * times.
  */
 static long keyring_read(const struct key *keyring,
                         char __user *buffer, size_t buflen)
 {
-       struct keyring_list *klist;
-       struct key *key;
-       size_t qty, tmp;
-       int loop, ret;
+       struct keyring_read_iterator_context ctx;
+       unsigned long nr_keys;
+       int ret;
 
-       ret = 0;
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (klist) {
-               /* calculate how much data we could return */
-               qty = klist->nkeys * sizeof(key_serial_t);
-
-               if (buffer && buflen > 0) {
-                       if (buflen > qty)
-                               buflen = qty;
-
-                       /* copy the IDs of the subscribed keys into the
-                        * buffer */
-                       ret = -EFAULT;
-
-                       for (loop = 0; loop < klist->nkeys; loop++) {
-                               key = rcu_deref_link_locked(klist, loop,
-                                                           keyring);
-
-                               tmp = sizeof(key_serial_t);
-                               if (tmp > buflen)
-                                       tmp = buflen;
-
-                               if (copy_to_user(buffer,
-                                                &key->serial,
-                                                tmp) != 0)
-                                       goto error;
-
-                               buflen -= tmp;
-                               if (buflen == 0)
-                                       break;
-                               buffer += tmp;
-                       }
-               }
+       kenter("{%d},,%zu", key_serial(keyring), buflen);
+
+       if (buflen & (sizeof(key_serial_t) - 1))
+               return -EINVAL;
+
+       nr_keys = keyring->keys.nr_leaves_on_tree;
+       if (nr_keys == 0)
+               return 0;
 
-               ret = qty;
+       /* Calculate how much data we could return */
+       ctx.qty = nr_keys * sizeof(key_serial_t);
+
+       if (!buffer || !buflen)
+               return ctx.qty;
+
+       if (buflen > ctx.qty)
+               ctx.qty = buflen;
+
+       /* Copy the IDs of the subscribed keys into the buffer */
+       ctx.buffer = (key_serial_t __user *)buffer;
+       ctx.count = 0;
+       ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
+       if (ret < 0) {
+               kleave(" = %d [iterate]", ret);
+               return ret;
        }
 
-error:
-       return ret;
+       kleave(" = %zu [ok]", ctx.count);
+       return ctx.count;
 }
 
 /*
@@ -277,227 +500,361 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
 }
 EXPORT_SYMBOL(keyring_alloc);
 
-/**
- * keyring_search_aux - Search a keyring tree for a key matching some criteria
- * @keyring_ref: A pointer to the keyring with possession indicator.
- * @cred: The credentials to use for permissions checks.
- * @type: The type of key to search for.
- * @description: Parameter for @match.
- * @match: Function to rule on whether or not a key is the one required.
- * @no_state_check: Don't check if a matching key is bad
- *
- * Search the supplied keyring tree for a key that matches the criteria given.
- * The root keyring and any linked keyrings must grant Search permission to the
- * caller to be searchable and keys can only be found if they too grant Search
- * to the caller. The possession flag on the root keyring pointer controls use
- * of the possessor bits in permissions checking of the entire tree.  In
- * addition, the LSM gets to forbid keyring searches and key matches.
- *
- * The search is performed as a breadth-then-depth search up to the prescribed
- * limit (KEYRING_SEARCH_MAX_DEPTH).
- *
- * Keys are matched to the type provided and are then filtered by the match
- * function, which is given the description to use in any way it sees fit.  The
- * match function may use any attributes of a key that it wishes to to
- * determine the match.  Normally the match function from the key type would be
- * used.
- *
- * RCU is used to prevent the keyring key lists from disappearing without the
- * need to take lots of locks.
- *
- * Returns a pointer to the found key and increments the key usage count if
- * successful; -EAGAIN if no matching keys were found, or if expired or revoked
- * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
- * specified keyring wasn't a keyring.
- *
- * In the case of a successful return, the possession attribute from
- * @keyring_ref is propagated to the returned key reference.
+/*
+ * Iteration function to consider each key found.
  */
-key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-                            const struct cred *cred,
-                            struct key_type *type,
-                            const void *description,
-                            key_match_func_t match,
-                            bool no_state_check)
+static int keyring_search_iterator(const void *object, void *iterator_data)
 {
-       struct {
-               /* Need a separate keylist pointer for RCU purposes */
-               struct key *keyring;
-               struct keyring_list *keylist;
-               int kix;
-       } stack[KEYRING_SEARCH_MAX_DEPTH];
-
-       struct keyring_list *keylist;
-       struct timespec now;
-       unsigned long possessed, kflags;
-       struct key *keyring, *key;
-       key_ref_t key_ref;
-       long err;
-       int sp, nkeys, kix;
+       struct keyring_search_context *ctx = iterator_data;
+       const struct key *key = keyring_ptr_to_key(object);
+       unsigned long kflags = key->flags;
 
-       keyring = key_ref_to_ptr(keyring_ref);
-       possessed = is_key_possessed(keyring_ref);
-       key_check(keyring);
+       kenter("{%d}", key->serial);
 
-       /* top keyring must have search permission to begin the search */
-       err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
-       if (err < 0) {
-               key_ref = ERR_PTR(err);
-               goto error;
+       /* ignore keys not of this type */
+       if (key->type != ctx->index_key.type) {
+               kleave(" = 0 [!type]");
+               return 0;
        }
 
-       key_ref = ERR_PTR(-ENOTDIR);
-       if (keyring->type != &key_type_keyring)
-               goto error;
+       /* skip invalidated, revoked and expired keys */
+       if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+               if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+                             (1 << KEY_FLAG_REVOKED))) {
+                       ctx->result = ERR_PTR(-EKEYREVOKED);
+                       kleave(" = %d [invrev]", ctx->skipped_ret);
+                       goto skipped;
+               }
 
-       rcu_read_lock();
+               if (key->expiry && ctx->now.tv_sec >= key->expiry) {
+                       ctx->result = ERR_PTR(-EKEYEXPIRED);
+                       kleave(" = %d [expire]", ctx->skipped_ret);
+                       goto skipped;
+               }
+       }
 
-       now = current_kernel_time();
-       err = -EAGAIN;
-       sp = 0;
-
-       /* firstly we should check to see if this top-level keyring is what we
-        * are looking for */
-       key_ref = ERR_PTR(-EAGAIN);
-       kflags = keyring->flags;
-       if (keyring->type == type && match(keyring, description)) {
-               key = keyring;
-               if (no_state_check)
-                       goto found;
+       /* keys that don't match */
+       if (!ctx->match(key, ctx->match_data)) {
+               kleave(" = 0 [!match]");
+               return 0;
+       }
 
-               /* check it isn't negative and hasn't expired or been
-                * revoked */
-               if (kflags & (1 << KEY_FLAG_REVOKED))
-                       goto error_2;
-               if (key->expiry && now.tv_sec >= key->expiry)
-                       goto error_2;
-               key_ref = ERR_PTR(key->type_data.reject_error);
-               if (kflags & (1 << KEY_FLAG_NEGATIVE))
-                       goto error_2;
-               goto found;
+       /* key must have search permissions */
+       if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+           key_task_permission(make_key_ref(key, ctx->possessed),
+                               ctx->cred, KEY_SEARCH) < 0) {
+               ctx->result = ERR_PTR(-EACCES);
+               kleave(" = %d [!perm]", ctx->skipped_ret);
+               goto skipped;
        }
 
-       /* otherwise, the top keyring must not be revoked, expired, or
-        * negatively instantiated if we are to search it */
-       key_ref = ERR_PTR(-EAGAIN);
-       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                     (1 << KEY_FLAG_REVOKED) |
-                     (1 << KEY_FLAG_NEGATIVE)) ||
-           (keyring->expiry && now.tv_sec >= keyring->expiry))
-               goto error_2;
-
-       /* start processing a new keyring */
-descend:
-       kflags = keyring->flags;
-       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                     (1 << KEY_FLAG_REVOKED)))
-               goto not_this_keyring;
+       if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+               /* we set a different error code if we pass a negative key */
+               if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
+                       smp_rmb();
+                       ctx->result = ERR_PTR(key->type_data.reject_error);
+                       kleave(" = %d [neg]", ctx->skipped_ret);
+                       goto skipped;
+               }
+       }
 
-       keylist = rcu_dereference(keyring->payload.subscriptions);
-       if (!keylist)
-               goto not_this_keyring;
+       /* Found */
+       ctx->result = make_key_ref(key, ctx->possessed);
+       kleave(" = 1 [found]");
+       return 1;
 
-       /* iterate through the keys in this keyring first */
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (kix = 0; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-               kflags = key->flags;
+skipped:
+       return ctx->skipped_ret;
+}
 
-               /* ignore keys not of this type */
-               if (key->type != type)
-                       continue;
+/*
+ * Search inside a keyring for a key.  We can search by walking to it
+ * directly based on its index-key or we can iterate over the entire
+ * tree looking for it, based on the match function.
+ */
+static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
+{
+       if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
+           KEYRING_SEARCH_LOOKUP_DIRECT) {
+               const void *object;
+
+               object = assoc_array_find(&keyring->keys,
+                                         &keyring_assoc_array_ops,
+                                         &ctx->index_key);
+               return object ? ctx->iterator(object, ctx) : 0;
+       }
+       return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx);
+}
 
-               /* skip invalidated, revoked and expired keys */
-               if (!no_state_check) {
-                       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                                     (1 << KEY_FLAG_REVOKED)))
-                               continue;
+/*
+ * Search a tree of keyrings that point to other keyrings up to the maximum
+ * depth.
+ */
+static bool search_nested_keyrings(struct key *keyring,
+                                  struct keyring_search_context *ctx)
+{
+       struct {
+               struct key *keyring;
+               struct assoc_array_node *node;
+               int slot;
+       } stack[KEYRING_SEARCH_MAX_DEPTH];
 
-                       if (key->expiry && now.tv_sec >= key->expiry)
-                               continue;
-               }
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *ptr;
+       struct key *key;
+       int sp = 0, slot;
 
-               /* keys that don't match */
-               if (!match(key, description))
-                       continue;
+       kenter("{%d},{%s,%s}",
+              keyring->serial,
+              ctx->index_key.type->name,
+              ctx->index_key.description);
 
-               /* key must have search permissions */
-               if (key_task_permission(make_key_ref(key, possessed),
-                                       cred, KEY_SEARCH) < 0)
-                       continue;
+       if (ctx->index_key.description)
+               ctx->index_key.desc_len = strlen(ctx->index_key.description);
 
-               if (no_state_check)
+       /* Check to see if this top-level keyring is what we are looking for
+        * and whether it is valid or not.
+        */
+       if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
+           keyring_compare_object(keyring, &ctx->index_key)) {
+               ctx->skipped_ret = 2;
+               ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
+               switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) {
+               case 1:
                        goto found;
-
-               /* we set a different error code if we pass a negative key */
-               if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
-                       err = key->type_data.reject_error;
-                       continue;
+               case 2:
+                       return false;
+               default:
+                       break;
                }
+       }
+
+       ctx->skipped_ret = 0;
+       if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+               ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK;
 
+       /* Start processing a new keyring */
+descend_to_keyring:
+       kdebug("descend to %d", keyring->serial);
+       if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                             (1 << KEY_FLAG_REVOKED)))
+               goto not_this_keyring;
+
+       /* Search through the keys in this keyring before its searching its
+        * subtrees.
+        */
+       if (search_keyring(keyring, ctx))
                goto found;
-       }
 
-       /* search through the keyrings nested in this one */
-       kix = 0;
-ascend:
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-               if (key->type != &key_type_keyring)
-                       continue;
+       /* Then manually iterate through the keyrings nested in this one.
+        *
+        * Start from the root node of the index tree.  Because of the way the
+        * hash function has been set up, keyrings cluster on the leftmost
+        * branch of the root node (root slot 0) or in the root node itself.
+        * Non-keyrings avoid the leftmost branch of the root entirely (root
+        * slots 1-15).
+        */
+       ptr = ACCESS_ONCE(keyring->keys.root);
+       if (!ptr)
+               goto not_this_keyring;
 
-               /* recursively search nested keyrings
-                * - only search keyrings for which we have search permission
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               /* If the root is a shortcut, either the keyring only contains
+                * keyring pointers (everything clusters behind root slot 0) or
+                * doesn't contain any keyring pointers.
                 */
-               if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
+                       goto not_this_keyring;
+
+               ptr = ACCESS_ONCE(shortcut->next_node);
+               node = assoc_array_ptr_to_node(ptr);
+               goto begin_node;
+       }
+
+       node = assoc_array_ptr_to_node(ptr);
+       smp_read_barrier_depends();
+
+       ptr = node->slots[0];
+       if (!assoc_array_ptr_is_meta(ptr))
+               goto begin_node;
+
+descend_to_node:
+       /* Descend to a more distal node in this keyring's content tree and go
+        * through that.
+        */
+       kdebug("descend");
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               ptr = ACCESS_ONCE(shortcut->next_node);
+               BUG_ON(!assoc_array_ptr_is_node(ptr));
+               node = assoc_array_ptr_to_node(ptr);
+       }
+
+begin_node:
+       kdebug("begin_node");
+       smp_read_barrier_depends();
+       slot = 0;
+ascend_to_node:
+       /* Go through the slots in a node */
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+
+               if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
+                       goto descend_to_node;
+
+               if (!keyring_ptr_is_keyring(ptr))
                        continue;
 
-               if (key_task_permission(make_key_ref(key, possessed),
-                                       cred, KEY_SEARCH) < 0)
+               key = keyring_ptr_to_key(ptr);
+
+               if (sp >= KEYRING_SEARCH_MAX_DEPTH) {
+                       if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) {
+                               ctx->result = ERR_PTR(-ELOOP);
+                               return false;
+                       }
+                       goto not_this_keyring;
+               }
+
+               /* Search a nested keyring */
+               if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+                   key_task_permission(make_key_ref(key, ctx->possessed),
+                                       ctx->cred, KEY_SEARCH) < 0)
                        continue;
 
                /* stack the current position */
                stack[sp].keyring = keyring;
-               stack[sp].keylist = keylist;
-               stack[sp].kix = kix;
+               stack[sp].node = node;
+               stack[sp].slot = slot;
                sp++;
 
                /* begin again with the new keyring */
                keyring = key;
-               goto descend;
+               goto descend_to_keyring;
        }
 
-       /* the keyring we're looking at was disqualified or didn't contain a
-        * matching key */
+       /* We've dealt with all the slots in the current node, so now we need
+        * to ascend to the parent and continue processing there.
+        */
+       ptr = ACCESS_ONCE(node->back_pointer);
+       slot = node->parent_slot;
+
+       if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               ptr = ACCESS_ONCE(shortcut->back_pointer);
+               slot = shortcut->parent_slot;
+       }
+       if (!ptr)
+               goto not_this_keyring;
+       node = assoc_array_ptr_to_node(ptr);
+       smp_read_barrier_depends();
+       slot++;
+
+       /* If we've ascended to the root (zero backpointer), we must have just
+        * finished processing the leftmost branch rather than the root slots -
+        * so there can't be any more keyrings for us to find.
+        */
+       if (node->back_pointer) {
+               kdebug("ascend %d", slot);
+               goto ascend_to_node;
+       }
+
+       /* The keyring we're looking at was disqualified or didn't contain a
+        * matching key.
+        */
 not_this_keyring:
-       if (sp > 0) {
-               /* resume the processing of a keyring higher up in the tree */
-               sp--;
-               keyring = stack[sp].keyring;
-               keylist = stack[sp].keylist;
-               kix = stack[sp].kix + 1;
-               goto ascend;
+       kdebug("not_this_keyring %d", sp);
+       if (sp <= 0) {
+               kleave(" = false");
+               return false;
        }
 
-       key_ref = ERR_PTR(err);
-       goto error_2;
+       /* Resume the processing of a keyring higher up in the tree */
+       sp--;
+       keyring = stack[sp].keyring;
+       node = stack[sp].node;
+       slot = stack[sp].slot + 1;
+       kdebug("ascend to %d [%d]", keyring->serial, slot);
+       goto ascend_to_node;
 
-       /* we found a viable match */
+       /* We found a viable match */
 found:
-       atomic_inc(&key->usage);
-       key->last_used_at = now.tv_sec;
-       keyring->last_used_at = now.tv_sec;
-       while (sp > 0)
-               stack[--sp].keyring->last_used_at = now.tv_sec;
+       key = key_ref_to_ptr(ctx->result);
        key_check(key);
-       key_ref = make_key_ref(key, possessed);
-error_2:
+       if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) {
+               key->last_used_at = ctx->now.tv_sec;
+               keyring->last_used_at = ctx->now.tv_sec;
+               while (sp > 0)
+                       stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
+       }
+       kleave(" = true");
+       return true;
+}
+
+/**
+ * keyring_search_aux - Search a keyring tree for a key matching some criteria
+ * @keyring_ref: A pointer to the keyring with possession indicator.
+ * @ctx: The keyring search context.
+ *
+ * Search the supplied keyring tree for a key that matches the criteria given.
+ * The root keyring and any linked keyrings must grant Search permission to the
+ * caller to be searchable and keys can only be found if they too grant Search
+ * to the caller. The possession flag on the root keyring pointer controls use
+ * of the possessor bits in permissions checking of the entire tree.  In
+ * addition, the LSM gets to forbid keyring searches and key matches.
+ *
+ * The search is performed as a breadth-then-depth search up to the prescribed
+ * limit (KEYRING_SEARCH_MAX_DEPTH).
+ *
+ * Keys are matched to the type provided and are then filtered by the match
+ * function, which is given the description to use in any way it sees fit.  The
+ * match function may use any attributes of a key that it wishes to to
+ * determine the match.  Normally the match function from the key type would be
+ * used.
+ *
+ * RCU can be used to prevent the keyring key lists from disappearing without
+ * the need to take lots of locks.
+ *
+ * Returns a pointer to the found key and increments the key usage count if
+ * successful; -EAGAIN if no matching keys were found, or if expired or revoked
+ * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
+ * specified keyring wasn't a keyring.
+ *
+ * In the case of a successful return, the possession attribute from
+ * @keyring_ref is propagated to the returned key reference.
+ */
+key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+                            struct keyring_search_context *ctx)
+{
+       struct key *keyring;
+       long err;
+
+       ctx->iterator = keyring_search_iterator;
+       ctx->possessed = is_key_possessed(keyring_ref);
+       ctx->result = ERR_PTR(-EAGAIN);
+
+       keyring = key_ref_to_ptr(keyring_ref);
+       key_check(keyring);
+
+       if (keyring->type != &key_type_keyring)
+               return ERR_PTR(-ENOTDIR);
+
+       if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) {
+               err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+               if (err < 0)
+                       return ERR_PTR(err);
+       }
+
+       rcu_read_lock();
+       ctx->now = current_kernel_time();
+       if (search_nested_keyrings(keyring, ctx))
+               __key_get(key_ref_to_ptr(ctx->result));
        rcu_read_unlock();
-error:
-       return key_ref;
+       return ctx->result;
 }
 
 /**
@@ -507,77 +864,73 @@ error:
  * @description: The name of the keyring we want to find.
  *
  * As keyring_search_aux() above, but using the current task's credentials and
- * type's default matching function.
+ * type's default matching function and preferred search method.
  */
 key_ref_t keyring_search(key_ref_t keyring,
                         struct key_type *type,
                         const char *description)
 {
-       if (!type->match)
+       struct keyring_search_context ctx = {
+               .index_key.type         = type,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = type->match,
+               .match_data             = description,
+               .flags                  = (type->def_lookup_type |
+                                          KEYRING_SEARCH_DO_STATE_CHECK),
+       };
+
+       if (!ctx.match)
                return ERR_PTR(-ENOKEY);
 
-       return keyring_search_aux(keyring, current->cred,
-                                 type, description, type->match, false);
+       return keyring_search_aux(keyring, &ctx);
 }
 EXPORT_SYMBOL(keyring_search);
 
 /*
- * Search the given keyring only (no recursion).
+ * Search the given keyring for a key that might be updated.
  *
  * The caller must guarantee that the keyring is a keyring and that the
- * permission is granted to search the keyring as no check is made here.
- *
- * RCU is used to make it unnecessary to lock the keyring key list here.
+ * permission is granted to modify the keyring as no check is made here.  The
+ * caller must also hold a lock on the keyring semaphore.
  *
  * Returns a pointer to the found key with usage count incremented if
- * successful and returns -ENOKEY if not found.  Revoked keys and keys not
- * providing the requested permission are skipped over.
+ * successful and returns NULL if not found.  Revoked and invalidated keys are
+ * skipped over.
  *
  * If successful, the possession indicator is propagated from the keyring ref
  * to the returned key reference.
  */
-key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-                              const struct key_type *ktype,
-                              const char *description,
-                              key_perm_t perm)
+key_ref_t find_key_to_update(key_ref_t keyring_ref,
+                            const struct keyring_index_key *index_key)
 {
-       struct keyring_list *klist;
-       unsigned long possessed;
        struct key *keyring, *key;
-       int nkeys, loop;
+       const void *object;
 
        keyring = key_ref_to_ptr(keyring_ref);
-       possessed = is_key_possessed(keyring_ref);
 
-       rcu_read_lock();
+       kenter("{%d},{%s,%s}",
+              keyring->serial, index_key->type->name, index_key->description);
 
-       klist = rcu_dereference(keyring->payload.subscriptions);
-       if (klist) {
-               nkeys = klist->nkeys;
-               smp_rmb();
-               for (loop = 0; loop < nkeys ; loop++) {
-                       key = rcu_dereference(klist->keys[loop]);
-                       if (key->type == ktype &&
-                           (!key->type->match ||
-                            key->type->match(key, description)) &&
-                           key_permission(make_key_ref(key, possessed),
-                                          perm) == 0 &&
-                           !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-                                           (1 << KEY_FLAG_REVOKED)))
-                           )
-                               goto found;
-               }
-       }
+       object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops,
+                                 index_key);
 
-       rcu_read_unlock();
-       return ERR_PTR(-ENOKEY);
+       if (object)
+               goto found;
+
+       kleave(" = NULL");
+       return NULL;
 
 found:
-       atomic_inc(&key->usage);
-       keyring->last_used_at = key->last_used_at =
-               current_kernel_time().tv_sec;
-       rcu_read_unlock();
-       return make_key_ref(key, possessed);
+       key = keyring_ptr_to_key(object);
+       if (key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                         (1 << KEY_FLAG_REVOKED))) {
+               kleave(" = NULL [x]");
+               return NULL;
+       }
+       __key_get(key);
+       kleave(" = {%d}", key->serial);
+       return make_key_ref(key, is_key_possessed(keyring_ref));
 }
 
 /*
@@ -640,6 +993,19 @@ out:
        return keyring;
 }
 
+static int keyring_detect_cycle_iterator(const void *object,
+                                        void *iterator_data)
+{
+       struct keyring_search_context *ctx = iterator_data;
+       const struct key *key = keyring_ptr_to_key(object);
+
+       kenter("{%d}", key->serial);
+
+       BUG_ON(key != ctx->match_data);
+       ctx->result = ERR_PTR(-EDEADLK);
+       return 1;
+}
+
 /*
  * See if a cycle will will be created by inserting acyclic tree B in acyclic
  * tree A at the topmost level (ie: as a direct child of A).
@@ -649,116 +1015,39 @@ out:
  */
 static int keyring_detect_cycle(struct key *A, struct key *B)
 {
-       struct {
-               struct keyring_list *keylist;
-               int kix;
-       } stack[KEYRING_SEARCH_MAX_DEPTH];
-
-       struct keyring_list *keylist;
-       struct key *subtree, *key;
-       int sp, nkeys, kix, ret;
+       struct keyring_search_context ctx = {
+               .index_key      = A->index_key,
+               .match_data     = A,
+               .iterator       = keyring_detect_cycle_iterator,
+               .flags          = (KEYRING_SEARCH_LOOKUP_DIRECT |
+                                  KEYRING_SEARCH_NO_STATE_CHECK |
+                                  KEYRING_SEARCH_NO_UPDATE_TIME |
+                                  KEYRING_SEARCH_NO_CHECK_PERM |
+                                  KEYRING_SEARCH_DETECT_TOO_DEEP),
+       };
 
        rcu_read_lock();
-
-       ret = -EDEADLK;
-       if (A == B)
-               goto cycle_detected;
-
-       subtree = B;
-       sp = 0;
-
-       /* start processing a new keyring */
-descend:
-       if (test_bit(KEY_FLAG_REVOKED, &subtree->flags))
-               goto not_this_keyring;
-
-       keylist = rcu_dereference(subtree->payload.subscriptions);
-       if (!keylist)
-               goto not_this_keyring;
-       kix = 0;
-
-ascend:
-       /* iterate through the remaining keys in this keyring */
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-
-               if (key == A)
-                       goto cycle_detected;
-
-               /* recursively check nested keyrings */
-               if (key->type == &key_type_keyring) {
-                       if (sp >= KEYRING_SEARCH_MAX_DEPTH)
-                               goto too_deep;
-
-                       /* stack the current position */
-                       stack[sp].keylist = keylist;
-                       stack[sp].kix = kix;
-                       sp++;
-
-                       /* begin again with the new keyring */
-                       subtree = key;
-                       goto descend;
-               }
-       }
-
-       /* the keyring we're looking at was disqualified or didn't contain a
-        * matching key */
-not_this_keyring:
-       if (sp > 0) {
-               /* resume the checking of a keyring higher up in the tree */
-               sp--;
-               keylist = stack[sp].keylist;
-               kix = stack[sp].kix + 1;
-               goto ascend;
-       }
-
-       ret = 0; /* no cycles detected */
-
-error:
+       search_nested_keyrings(B, &ctx);
        rcu_read_unlock();
-       return ret;
-
-too_deep:
-       ret = -ELOOP;
-       goto error;
-
-cycle_detected:
-       ret = -EDEADLK;
-       goto error;
-}
-
-/*
- * Dispose of a keyring list after the RCU grace period, freeing the unlinked
- * key
- */
-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
-{
-       struct keyring_list *klist =
-               container_of(rcu, struct keyring_list, rcu);
-
-       if (klist->delkey != USHRT_MAX)
-               key_put(rcu_access_pointer(klist->keys[klist->delkey]));
-       kfree(klist);
+       return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result);
 }
 
 /*
  * Preallocate memory so that a key can be linked into to a keyring.
  */
-int __key_link_begin(struct key *keyring, const struct key_type *type,
-                    const char *description, unsigned long *_prealloc)
+int __key_link_begin(struct key *keyring,
+                    const struct keyring_index_key *index_key,
+                    struct assoc_array_edit **_edit)
        __acquires(&keyring->sem)
        __acquires(&keyring_serialise_link_sem)
 {
-       struct keyring_list *klist, *nklist;
-       unsigned long prealloc;
-       unsigned max;
-       time_t lowest_lru;
-       size_t size;
-       int loop, lru, ret;
+       struct assoc_array_edit *edit;
+       int ret;
+
+       kenter("%d,%s,%s,",
+              keyring->serial, index_key->type->name, index_key->description);
 
-       kenter("%d,%s,%s,", key_serial(keyring), type->name, description);
+       BUG_ON(index_key->desc_len == 0);
 
        if (keyring->type != &key_type_keyring)
                return -ENOTDIR;
@@ -771,100 +1060,39 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 
        /* serialise link/link calls to prevent parallel calls causing a cycle
         * when linking two keyring in opposite orders */
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                down_write(&keyring_serialise_link_sem);
 
-       klist = rcu_dereference_locked_keyring(keyring);
-
-       /* see if there's a matching key we can displace */
-       lru = -1;
-       if (klist && klist->nkeys > 0) {
-               lowest_lru = TIME_T_MAX;
-               for (loop = klist->nkeys - 1; loop >= 0; loop--) {
-                       struct key *key = rcu_deref_link_locked(klist, loop,
-                                                               keyring);
-                       if (key->type == type &&
-                           strcmp(key->description, description) == 0) {
-                               /* Found a match - we'll replace the link with
-                                * one to the new key.  We record the slot
-                                * position.
-                                */
-                               klist->delkey = loop;
-                               prealloc = 0;
-                               goto done;
-                       }
-                       if (key->last_used_at < lowest_lru) {
-                               lowest_lru = key->last_used_at;
-                               lru = loop;
-                       }
-               }
-       }
-
-       /* If the keyring is full then do an LRU discard */
-       if (klist &&
-           klist->nkeys == klist->maxkeys &&
-           klist->maxkeys >= MAX_KEYRING_LINKS) {
-               kdebug("LRU discard %d\n", lru);
-               klist->delkey = lru;
-               prealloc = 0;
-               goto done;
-       }
-
-       /* check that we aren't going to overrun the user's quota */
-       ret = key_payload_reserve(keyring,
-                                 keyring->datalen + KEYQUOTA_LINK_BYTES);
-       if (ret < 0)
+       /* Create an edit script that will insert/replace the key in the
+        * keyring tree.
+        */
+       edit = assoc_array_insert(&keyring->keys,
+                                 &keyring_assoc_array_ops,
+                                 index_key,
+                                 NULL);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
                goto error_sem;
+       }
 
-       if (klist && klist->nkeys < klist->maxkeys) {
-               /* there's sufficient slack space to append directly */
-               klist->delkey = klist->nkeys;
-               prealloc = KEY_LINK_FIXQUOTA;
-       } else {
-               /* grow the key list */
-               max = 4;
-               if (klist) {
-                       max += klist->maxkeys;
-                       if (max > MAX_KEYRING_LINKS)
-                               max = MAX_KEYRING_LINKS;
-                       BUG_ON(max <= klist->maxkeys);
-               }
-
-               size = sizeof(*klist) + sizeof(struct key *) * max;
-
-               ret = -ENOMEM;
-               nklist = kmalloc(size, GFP_KERNEL);
-               if (!nklist)
-                       goto error_quota;
-
-               nklist->maxkeys = max;
-               if (klist) {
-                       memcpy(nklist->keys, klist->keys,
-                              sizeof(struct key *) * klist->nkeys);
-                       nklist->delkey = klist->nkeys;
-                       nklist->nkeys = klist->nkeys + 1;
-                       klist->delkey = USHRT_MAX;
-               } else {
-                       nklist->nkeys = 1;
-                       nklist->delkey = 0;
-               }
-
-               /* add the key into the new space */
-               RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL);
-               prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA;
+       /* If we're not replacing a link in-place then we're going to need some
+        * extra quota.
+        */
+       if (!edit->dead_leaf) {
+               ret = key_payload_reserve(keyring,
+                                         keyring->datalen + KEYQUOTA_LINK_BYTES);
+               if (ret < 0)
+                       goto error_cancel;
        }
 
-done:
-       *_prealloc = prealloc;
+       *_edit = edit;
        kleave(" = 0");
        return 0;
 
-error_quota:
-       /* undo the quota changes */
-       key_payload_reserve(keyring,
-                           keyring->datalen - KEYQUOTA_LINK_BYTES);
+error_cancel:
+       assoc_array_cancel_edit(edit);
 error_sem:
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                up_write(&keyring_serialise_link_sem);
 error_krsem:
        up_write(&keyring->sem);
@@ -895,60 +1123,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key)
  * holds at most one link to any given key of a particular type+description
  * combination.
  */
-void __key_link(struct key *keyring, struct key *key,
-               unsigned long *_prealloc)
+void __key_link(struct key *key, struct assoc_array_edit **_edit)
 {
-       struct keyring_list *klist, *nklist;
-       struct key *discard;
-
-       nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA);
-       *_prealloc = 0;
-
-       kenter("%d,%d,%p", keyring->serial, key->serial, nklist);
-
-       klist = rcu_dereference_locked_keyring(keyring);
-
-       atomic_inc(&key->usage);
-       keyring->last_used_at = key->last_used_at =
-               current_kernel_time().tv_sec;
-
-       /* there's a matching key we can displace or an empty slot in a newly
-        * allocated list we can fill */
-       if (nklist) {
-               kdebug("reissue %hu/%hu/%hu",
-                      nklist->delkey, nklist->nkeys, nklist->maxkeys);
-
-               RCU_INIT_POINTER(nklist->keys[nklist->delkey], key);
-
-               rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
-               /* dispose of the old keyring list and, if there was one, the
-                * displaced key */
-               if (klist) {
-                       kdebug("dispose %hu/%hu/%hu",
-                              klist->delkey, klist->nkeys, klist->maxkeys);
-                       call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
-               }
-       } else if (klist->delkey < klist->nkeys) {
-               kdebug("replace %hu/%hu/%hu",
-                      klist->delkey, klist->nkeys, klist->maxkeys);
-
-               discard = rcu_dereference_protected(
-                       klist->keys[klist->delkey],
-                       rwsem_is_locked(&keyring->sem));
-               rcu_assign_pointer(klist->keys[klist->delkey], key);
-               /* The garbage collector will take care of RCU
-                * synchronisation */
-               key_put(discard);
-       } else {
-               /* there's sufficient slack space to append directly */
-               kdebug("append %hu/%hu/%hu",
-                      klist->delkey, klist->nkeys, klist->maxkeys);
-
-               RCU_INIT_POINTER(klist->keys[klist->delkey], key);
-               smp_wmb();
-               klist->nkeys++;
-       }
+       __key_get(key);
+       assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key));
+       assoc_array_apply_edit(*_edit);
+       *_edit = NULL;
 }
 
 /*
@@ -956,24 +1136,22 @@ void __key_link(struct key *keyring, struct key *key,
  *
  * Must be called with __key_link_begin() having being called.
  */
-void __key_link_end(struct key *keyring, struct key_type *type,
-                   unsigned long prealloc)
+void __key_link_end(struct key *keyring,
+                   const struct keyring_index_key *index_key,
+                   struct assoc_array_edit *edit)
        __releases(&keyring->sem)
        __releases(&keyring_serialise_link_sem)
 {
-       BUG_ON(type == NULL);
-       BUG_ON(type->name == NULL);
-       kenter("%d,%s,%lx", keyring->serial, type->name, prealloc);
+       BUG_ON(index_key->type == NULL);
+       kenter("%d,%s,", keyring->serial, index_key->type->name);
 
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                up_write(&keyring_serialise_link_sem);
 
-       if (prealloc) {
-               if (prealloc & KEY_LINK_FIXQUOTA)
-                       key_payload_reserve(keyring,
-                                           keyring->datalen -
-                                           KEYQUOTA_LINK_BYTES);
-               kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA));
+       if (edit && !edit->dead_leaf) {
+               key_payload_reserve(keyring,
+                                   keyring->datalen - KEYQUOTA_LINK_BYTES);
+               assoc_array_cancel_edit(edit);
        }
        up_write(&keyring->sem);
 }
@@ -1000,20 +1178,28 @@ void __key_link_end(struct key *keyring, struct key_type *type,
  */
 int key_link(struct key *keyring, struct key *key)
 {
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
        int ret;
 
+       kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage));
+
        key_check(keyring);
        key_check(key);
 
-       ret = __key_link_begin(keyring, key->type, key->description, &prealloc);
+       if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) &&
+           !test_bit(KEY_FLAG_TRUSTED, &key->flags))
+               return -EPERM;
+
+       ret = __key_link_begin(keyring, &key->index_key, &edit);
        if (ret == 0) {
+               kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
                ret = __key_link_check_live_key(keyring, key);
                if (ret == 0)
-                       __key_link(keyring, key, &prealloc);
-               __key_link_end(keyring, key->type, prealloc);
+                       __key_link(key, &edit);
+               __key_link_end(keyring, &key->index_key, edit);
        }
 
+       kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage));
        return ret;
 }
 EXPORT_SYMBOL(key_link);
@@ -1037,90 +1223,37 @@ EXPORT_SYMBOL(key_link);
  */
 int key_unlink(struct key *keyring, struct key *key)
 {
-       struct keyring_list *klist, *nklist;
-       int loop, ret;
+       struct assoc_array_edit *edit;
+       int ret;
 
        key_check(keyring);
        key_check(key);
 
-       ret = -ENOTDIR;
        if (keyring->type != &key_type_keyring)
-               goto error;
+               return -ENOTDIR;
 
        down_write(&keyring->sem);
 
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (klist) {
-               /* search the keyring for the key */
-               for (loop = 0; loop < klist->nkeys; loop++)
-                       if (rcu_access_pointer(klist->keys[loop]) == key)
-                               goto key_is_present;
+       edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
+                                 &key->index_key);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
+               goto error;
        }
-
-       up_write(&keyring->sem);
        ret = -ENOENT;
-       goto error;
-
-key_is_present:
-       /* we need to copy the key list for RCU purposes */
-       nklist = kmalloc(sizeof(*klist) +
-                        sizeof(struct key *) * klist->maxkeys,
-                        GFP_KERNEL);
-       if (!nklist)
-               goto nomem;
-       nklist->maxkeys = klist->maxkeys;
-       nklist->nkeys = klist->nkeys - 1;
-
-       if (loop > 0)
-               memcpy(&nklist->keys[0],
-                      &klist->keys[0],
-                      loop * sizeof(struct key *));
-
-       if (loop < nklist->nkeys)
-               memcpy(&nklist->keys[loop],
-                      &klist->keys[loop + 1],
-                      (nklist->nkeys - loop) * sizeof(struct key *));
-
-       /* adjust the user's quota */
-       key_payload_reserve(keyring,
-                           keyring->datalen - KEYQUOTA_LINK_BYTES);
-
-       rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
-       up_write(&keyring->sem);
-
-       /* schedule for later cleanup */
-       klist->delkey = loop;
-       call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
+       if (edit == NULL)
+               goto error;
 
+       assoc_array_apply_edit(edit);
+       key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES);
        ret = 0;
 
 error:
-       return ret;
-nomem:
-       ret = -ENOMEM;
        up_write(&keyring->sem);
-       goto error;
+       return ret;
 }
 EXPORT_SYMBOL(key_unlink);
 
-/*
- * Dispose of a keyring list after the RCU grace period, releasing the keys it
- * links to.
- */
-static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
-{
-       struct keyring_list *klist;
-       int loop;
-
-       klist = container_of(rcu, struct keyring_list, rcu);
-
-       for (loop = klist->nkeys - 1; loop >= 0; loop--)
-               key_put(rcu_access_pointer(klist->keys[loop]));
-
-       kfree(klist);
-}
-
 /**
  * keyring_clear - Clear a keyring
  * @keyring: The keyring to clear.
@@ -1131,33 +1264,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
  */
 int keyring_clear(struct key *keyring)
 {
-       struct keyring_list *klist;
+       struct assoc_array_edit *edit;
        int ret;
 
-       ret = -ENOTDIR;
-       if (keyring->type == &key_type_keyring) {
-               /* detach the pointer block with the locks held */
-               down_write(&keyring->sem);
-
-               klist = rcu_dereference_locked_keyring(keyring);
-               if (klist) {
-                       /* adjust the quota */
-                       key_payload_reserve(keyring,
-                                           sizeof(struct keyring_list));
-
-                       rcu_assign_pointer(keyring->payload.subscriptions,
-                                          NULL);
-               }
-
-               up_write(&keyring->sem);
+       if (keyring->type != &key_type_keyring)
+               return -ENOTDIR;
 
-               /* free the keys after the locks have been dropped */
-               if (klist)
-                       call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+       down_write(&keyring->sem);
 
+       edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
+       } else {
+               if (edit)
+                       assoc_array_apply_edit(edit);
+               key_payload_reserve(keyring, 0);
                ret = 0;
        }
 
+       up_write(&keyring->sem);
        return ret;
 }
 EXPORT_SYMBOL(keyring_clear);
@@ -1169,111 +1294,68 @@ EXPORT_SYMBOL(keyring_clear);
  */
 static void keyring_revoke(struct key *keyring)
 {
-       struct keyring_list *klist;
+       struct assoc_array_edit *edit;
+
+       edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+       if (!IS_ERR(edit)) {
+               if (edit)
+                       assoc_array_apply_edit(edit);
+               key_payload_reserve(keyring, 0);
+       }
+}
+
+static bool keyring_gc_select_iterator(void *object, void *iterator_data)
+{
+       struct key *key = keyring_ptr_to_key(object);
+       time_t *limit = iterator_data;
 
-       klist = rcu_dereference_locked_keyring(keyring);
+       if (key_is_dead(key, *limit))
+               return false;
+       key_get(key);
+       return true;
+}
 
-       /* adjust the quota */
-       key_payload_reserve(keyring, 0);
+static int keyring_gc_check_iterator(const void *object, void *iterator_data)
+{
+       const struct key *key = keyring_ptr_to_key(object);
+       time_t *limit = iterator_data;
 
-       if (klist) {
-               rcu_assign_pointer(keyring->payload.subscriptions, NULL);
-               call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
-       }
+       key_check(key);
+       return key_is_dead(key, *limit);
 }
 
 /*
- * Collect garbage from the contents of a keyring, replacing the old list with
- * a new one with the pointers all shuffled down.
+ * Garbage collect pointers from a keyring.
  *
- * Dead keys are classed as oned that are flagged as being dead or are revoked,
- * expired or negative keys that were revoked or expired before the specified
- * limit.
+ * Not called with any locks held.  The keyring's key struct will not be
+ * deallocated under us as only our caller may deallocate it.
  */
 void keyring_gc(struct key *keyring, time_t limit)
 {
-       struct keyring_list *klist, *new;
-       struct key *key;
-       int loop, keep, max;
-
-       kenter("{%x,%s}", key_serial(keyring), keyring->description);
-
-       down_write(&keyring->sem);
-
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (!klist)
-               goto no_klist;
-
-       /* work out how many subscriptions we're keeping */
-       keep = 0;
-       for (loop = klist->nkeys - 1; loop >= 0; loop--)
-               if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring),
-                                limit))
-                       keep++;
-
-       if (keep == klist->nkeys)
-               goto just_return;
-
-       /* allocate a new keyring payload */
-       max = roundup(keep, 4);
-       new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
-                     GFP_KERNEL);
-       if (!new)
-               goto nomem;
-       new->maxkeys = max;
-       new->nkeys = 0;
-       new->delkey = 0;
-
-       /* install the live keys
-        * - must take care as expired keys may be updated back to life
-        */
-       keep = 0;
-       for (loop = klist->nkeys - 1; loop >= 0; loop--) {
-               key = rcu_deref_link_locked(klist, loop, keyring);
-               if (!key_is_dead(key, limit)) {
-                       if (keep >= max)
-                               goto discard_new;
-                       RCU_INIT_POINTER(new->keys[keep++], key_get(key));
-               }
-       }
-       new->nkeys = keep;
-
-       /* adjust the quota */
-       key_payload_reserve(keyring,
-                           sizeof(struct keyring_list) +
-                           KEYQUOTA_LINK_BYTES * keep);
+       int result;
 
-       if (keep == 0) {
-               rcu_assign_pointer(keyring->payload.subscriptions, NULL);
-               kfree(new);
-       } else {
-               rcu_assign_pointer(keyring->payload.subscriptions, new);
-       }
+       kenter("%x{%s}", keyring->serial, keyring->description ?: "");
 
-       up_write(&keyring->sem);
+       if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                             (1 << KEY_FLAG_REVOKED)))
+               goto dont_gc;
 
-       call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
-       kleave(" [yes]");
-       return;
-
-discard_new:
-       new->nkeys = keep;
-       keyring_clear_rcu_disposal(&new->rcu);
-       up_write(&keyring->sem);
-       kleave(" [discard]");
-       return;
-
-just_return:
-       up_write(&keyring->sem);
-       kleave(" [no dead]");
-       return;
+       /* scan the keyring looking for dead keys */
+       rcu_read_lock();
+       result = assoc_array_iterate(&keyring->keys,
+                                    keyring_gc_check_iterator, &limit);
+       rcu_read_unlock();
+       if (result == true)
+               goto do_gc;
 
-no_klist:
-       up_write(&keyring->sem);
-       kleave(" [no_klist]");
+dont_gc:
+       kleave(" [no gc]");
        return;
 
-nomem:
+do_gc:
+       down_write(&keyring->sem);
+       assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops,
+                      keyring_gc_select_iterator, &limit);
        up_write(&keyring->sem);
-       kleave(" [oom]");
+       kleave(" [gc]");
 }
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
new file mode 100644 (file)
index 0000000..0ad3ee2
--- /dev/null
@@ -0,0 +1,167 @@
+/* General persistent per-UID keyrings register
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/user_namespace.h>
+#include "internal.h"
+
+unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
+
+/*
+ * Create the persistent keyring register for the current user namespace.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static int key_create_persistent_register(struct user_namespace *ns)
+{
+       struct key *reg = keyring_alloc(".persistent_register",
+                                       KUIDT_INIT(0), KGIDT_INIT(0),
+                                       current_cred(),
+                                       ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                        KEY_USR_VIEW | KEY_USR_READ),
+                                       KEY_ALLOC_NOT_IN_QUOTA, NULL);
+       if (IS_ERR(reg))
+               return PTR_ERR(reg);
+
+       ns->persistent_keyring_register = reg;
+       return 0;
+}
+
+/*
+ * Create the persistent keyring for the specified user.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
+                                      struct keyring_index_key *index_key)
+{
+       struct key *persistent;
+       key_ref_t reg_ref, persistent_ref;
+
+       if (!ns->persistent_keyring_register) {
+               long err = key_create_persistent_register(ns);
+               if (err < 0)
+                       return ERR_PTR(err);
+       } else {
+               reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+               persistent_ref = find_key_to_update(reg_ref, index_key);
+               if (persistent_ref)
+                       return persistent_ref;
+       }
+
+       persistent = keyring_alloc(index_key->description,
+                                  uid, INVALID_GID, current_cred(),
+                                  ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                   KEY_USR_VIEW | KEY_USR_READ),
+                                  KEY_ALLOC_NOT_IN_QUOTA,
+                                  ns->persistent_keyring_register);
+       if (IS_ERR(persistent))
+               return ERR_CAST(persistent);
+
+       return make_key_ref(persistent, true);
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
+                              key_ref_t dest_ref)
+{
+       struct keyring_index_key index_key;
+       struct key *persistent;
+       key_ref_t reg_ref, persistent_ref;
+       char buf[32];
+       long ret;
+
+       /* Look in the register if it exists */
+       index_key.type = &key_type_keyring;
+       index_key.description = buf;
+       index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid));
+
+       if (ns->persistent_keyring_register) {
+               reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+               down_read(&ns->persistent_keyring_register_sem);
+               persistent_ref = find_key_to_update(reg_ref, &index_key);
+               up_read(&ns->persistent_keyring_register_sem);
+
+               if (persistent_ref)
+                       goto found;
+       }
+
+       /* It wasn't in the register, so we'll need to create it.  We might
+        * also need to create the register.
+        */
+       down_write(&ns->persistent_keyring_register_sem);
+       persistent_ref = key_create_persistent(ns, uid, &index_key);
+       up_write(&ns->persistent_keyring_register_sem);
+       if (!IS_ERR(persistent_ref))
+               goto found;
+
+       return PTR_ERR(persistent_ref);
+
+found:
+       ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK);
+       if (ret == 0) {
+               persistent = key_ref_to_ptr(persistent_ref);
+               ret = key_link(key_ref_to_ptr(dest_ref), persistent);
+               if (ret == 0) {
+                       key_set_timeout(persistent, persistent_keyring_expiry);
+                       ret = persistent->serial;               
+               }
+       }
+
+       key_ref_put(persistent_ref);
+       return ret;
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+long keyctl_get_persistent(uid_t _uid, key_serial_t destid)
+{
+       struct user_namespace *ns = current_user_ns();
+       key_ref_t dest_ref;
+       kuid_t uid;
+       long ret;
+
+       /* -1 indicates the current user */
+       if (_uid == (uid_t)-1) {
+               uid = current_uid();
+       } else {
+               uid = make_kuid(ns, _uid);
+               if (!uid_valid(uid))
+                       return -EINVAL;
+
+               /* You can only see your own persistent cache if you're not
+                * sufficiently privileged.
+                */
+               if (!uid_eq(uid, current_uid()) &&
+                   !uid_eq(uid, current_euid()) &&
+                   !ns_capable(ns, CAP_SETUID))
+                       return -EPERM;
+       }
+
+       /* There must be a destination keyring */
+       dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE);
+       if (IS_ERR(dest_ref))
+               return PTR_ERR(dest_ref);
+       if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) {
+               ret = -ENOTDIR;
+               goto out_put_dest;
+       }
+
+       ret = key_get_persistent(ns, uid, dest_ref);
+
+out_put_dest:
+       key_ref_put(dest_ref);
+       return ret;
+}
index 217b6855e815cb851153fa08646d2bf145cee579..88e9a466940f642af60f61b407888155ba057be5 100644 (file)
@@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v)
 
 static int proc_keys_show(struct seq_file *m, void *v)
 {
-       const struct cred *cred = current_cred();
        struct rb_node *_p = v;
        struct key *key = rb_entry(_p, struct key, serial_node);
        struct timespec now;
@@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v)
        char xbuf[12];
        int rc;
 
+       struct keyring_search_context ctx = {
+               .index_key.type         = key->type,
+               .index_key.description  = key->description,
+               .cred                   = current_cred(),
+               .match                  = lookup_user_key_possessed,
+               .match_data             = key,
+               .flags                  = (KEYRING_SEARCH_NO_STATE_CHECK |
+                                          KEYRING_SEARCH_LOOKUP_DIRECT),
+       };
+
        key_ref = make_key_ref(key, 0);
 
        /* determine if the key is possessed by this process (a test we can
         * skip if the key does not indicate the possessor can view it
         */
        if (key->perm & KEY_POS_VIEW) {
-               skey_ref = search_my_process_keyrings(key->type, key,
-                                                     lookup_user_key_possessed,
-                                                     true, cred);
+               skey_ref = search_my_process_keyrings(&ctx);
                if (!IS_ERR(skey_ref)) {
                        key_ref_put(skey_ref);
                        key_ref = make_key_ref(key, 1);
@@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
         * - the caller holds a spinlock, and thus the RCU read lock, making our
         *   access to __current_cred() safe
         */
-       rc = key_task_permission(key_ref, cred, KEY_VIEW);
+       rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW);
        if (rc < 0)
                return 0;
 
index 42defae1e161632e93b13b8194af1a30a09f2492..0cf8a130a267ca58fbc5599787c93b9913cfc576 100644 (file)
@@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
                if (IS_ERR(keyring))
                        return PTR_ERR(keyring);
        } else {
-               atomic_inc(&keyring->usage);
+               __key_get(keyring);
        }
 
        /* install the keyring */
@@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk)
  * In the case of a successful return, the possession attribute is set on the
  * returned key reference.
  */
-key_ref_t search_my_process_keyrings(struct key_type *type,
-                                    const void *description,
-                                    key_match_func_t match,
-                                    bool no_state_check,
-                                    const struct cred *cred)
+key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
 {
        key_ref_t key_ref, ret, err;
 
@@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
        err = ERR_PTR(-EAGAIN);
 
        /* search the thread keyring first */
-       if (cred->thread_keyring) {
+       if (ctx->cred->thread_keyring) {
                key_ref = keyring_search_aux(
-                       make_key_ref(cred->thread_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->thread_keyring, 1), ctx);
                if (!IS_ERR(key_ref))
                        goto found;
 
@@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
        }
 
        /* search the process keyring second */
-       if (cred->process_keyring) {
+       if (ctx->cred->process_keyring) {
                key_ref = keyring_search_aux(
-                       make_key_ref(cred->process_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->process_keyring, 1), ctx);
                if (!IS_ERR(key_ref))
                        goto found;
 
@@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
        }
 
        /* search the session keyring */
-       if (cred->session_keyring) {
+       if (ctx->cred->session_keyring) {
                rcu_read_lock();
                key_ref = keyring_search_aux(
-                       make_key_ref(rcu_dereference(cred->session_keyring), 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1),
+                       ctx);
                rcu_read_unlock();
 
                if (!IS_ERR(key_ref))
@@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
                }
        }
        /* or search the user-session keyring */
-       else if (cred->user->session_keyring) {
+       else if (ctx->cred->user->session_keyring) {
                key_ref = keyring_search_aux(
-                       make_key_ref(cred->user->session_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->user->session_keyring, 1),
+                       ctx);
                if (!IS_ERR(key_ref))
                        goto found;
 
@@ -437,18 +431,14 @@ found:
  *
  * Return same as search_my_process_keyrings().
  */
-key_ref_t search_process_keyrings(struct key_type *type,
-                                 const void *description,
-                                 key_match_func_t match,
-                                 const struct cred *cred)
+key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
 {
        struct request_key_auth *rka;
        key_ref_t key_ref, ret = ERR_PTR(-EACCES), err;
 
        might_sleep();
 
-       key_ref = search_my_process_keyrings(type, description, match,
-                                            false, cred);
+       key_ref = search_my_process_keyrings(ctx);
        if (!IS_ERR(key_ref))
                goto found;
        err = key_ref;
@@ -457,18 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
         * search the keyrings of the process mentioned there
         * - we don't permit access to request_key auth keys via this method
         */
-       if (cred->request_key_auth &&
-           cred == current_cred() &&
-           type != &key_type_request_key_auth
+       if (ctx->cred->request_key_auth &&
+           ctx->cred == current_cred() &&
+           ctx->index_key.type != &key_type_request_key_auth
            ) {
+               const struct cred *cred = ctx->cred;
+
                /* defend against the auth key being revoked */
                down_read(&cred->request_key_auth->sem);
 
-               if (key_validate(cred->request_key_auth) == 0) {
-                       rka = cred->request_key_auth->payload.data;
+               if (key_validate(ctx->cred->request_key_auth) == 0) {
+                       rka = ctx->cred->request_key_auth->payload.data;
 
-                       key_ref = search_process_keyrings(type, description,
-                                                         match, rka->cred);
+                       ctx->cred = rka->cred;
+                       key_ref = search_process_keyrings(ctx);
+                       ctx->cred = cred;
 
                        up_read(&cred->request_key_auth->sem);
 
@@ -522,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target)
 key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
                          key_perm_t perm)
 {
+       struct keyring_search_context ctx = {
+               .match  = lookup_user_key_possessed,
+               .flags  = (KEYRING_SEARCH_NO_STATE_CHECK |
+                          KEYRING_SEARCH_LOOKUP_DIRECT),
+       };
        struct request_key_auth *rka;
-       const struct cred *cred;
        struct key *key;
        key_ref_t key_ref, skey_ref;
        int ret;
 
 try_again:
-       cred = get_current_cred();
+       ctx.cred = get_current_cred();
        key_ref = ERR_PTR(-ENOKEY);
 
        switch (id) {
        case KEY_SPEC_THREAD_KEYRING:
-               if (!cred->thread_keyring) {
+               if (!ctx.cred->thread_keyring) {
                        if (!(lflags & KEY_LOOKUP_CREATE))
                                goto error;
 
@@ -546,13 +543,13 @@ try_again:
                        goto reget_creds;
                }
 
-               key = cred->thread_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->thread_keyring;
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_PROCESS_KEYRING:
-               if (!cred->process_keyring) {
+               if (!ctx.cred->process_keyring) {
                        if (!(lflags & KEY_LOOKUP_CREATE))
                                goto error;
 
@@ -564,13 +561,13 @@ try_again:
                        goto reget_creds;
                }
 
-               key = cred->process_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->process_keyring;
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_SESSION_KEYRING:
-               if (!cred->session_keyring) {
+               if (!ctx.cred->session_keyring) {
                        /* always install a session keyring upon access if one
                         * doesn't exist yet */
                        ret = install_user_keyrings();
@@ -580,13 +577,13 @@ try_again:
                                ret = join_session_keyring(NULL);
                        else
                                ret = install_session_keyring(
-                                       cred->user->session_keyring);
+                                       ctx.cred->user->session_keyring);
 
                        if (ret < 0)
                                goto error;
                        goto reget_creds;
-               } else if (cred->session_keyring ==
-                          cred->user->session_keyring &&
+               } else if (ctx.cred->session_keyring ==
+                          ctx.cred->user->session_keyring &&
                           lflags & KEY_LOOKUP_CREATE) {
                        ret = join_session_keyring(NULL);
                        if (ret < 0)
@@ -595,33 +592,33 @@ try_again:
                }
 
                rcu_read_lock();
-               key = rcu_dereference(cred->session_keyring);
-               atomic_inc(&key->usage);
+               key = rcu_dereference(ctx.cred->session_keyring);
+               __key_get(key);
                rcu_read_unlock();
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_USER_KEYRING:
-               if (!cred->user->uid_keyring) {
+               if (!ctx.cred->user->uid_keyring) {
                        ret = install_user_keyrings();
                        if (ret < 0)
                                goto error;
                }
 
-               key = cred->user->uid_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->user->uid_keyring;
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_USER_SESSION_KEYRING:
-               if (!cred->user->session_keyring) {
+               if (!ctx.cred->user->session_keyring) {
                        ret = install_user_keyrings();
                        if (ret < 0)
                                goto error;
                }
 
-               key = cred->user->session_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->user->session_keyring;
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
@@ -631,29 +628,29 @@ try_again:
                goto error;
 
        case KEY_SPEC_REQKEY_AUTH_KEY:
-               key = cred->request_key_auth;
+               key = ctx.cred->request_key_auth;
                if (!key)
                        goto error;
 
-               atomic_inc(&key->usage);
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_REQUESTOR_KEYRING:
-               if (!cred->request_key_auth)
+               if (!ctx.cred->request_key_auth)
                        goto error;
 
-               down_read(&cred->request_key_auth->sem);
+               down_read(&ctx.cred->request_key_auth->sem);
                if (test_bit(KEY_FLAG_REVOKED,
-                            &cred->request_key_auth->flags)) {
+                            &ctx.cred->request_key_auth->flags)) {
                        key_ref = ERR_PTR(-EKEYREVOKED);
                        key = NULL;
                } else {
-                       rka = cred->request_key_auth->payload.data;
+                       rka = ctx.cred->request_key_auth->payload.data;
                        key = rka->dest_keyring;
-                       atomic_inc(&key->usage);
+                       __key_get(key);
                }
-               up_read(&cred->request_key_auth->sem);
+               up_read(&ctx.cred->request_key_auth->sem);
                if (!key)
                        goto error;
                key_ref = make_key_ref(key, 1);
@@ -673,9 +670,13 @@ try_again:
                key_ref = make_key_ref(key, 0);
 
                /* check to see if we possess the key */
-               skey_ref = search_process_keyrings(key->type, key,
-                                                  lookup_user_key_possessed,
-                                                  cred);
+               ctx.index_key.type              = key->type;
+               ctx.index_key.description       = key->description;
+               ctx.index_key.desc_len          = strlen(key->description);
+               ctx.match_data                  = key;
+               kdebug("check possessed");
+               skey_ref = search_process_keyrings(&ctx);
+               kdebug("possessed=%p", skey_ref);
 
                if (!IS_ERR(skey_ref)) {
                        key_put(key);
@@ -715,14 +716,14 @@ try_again:
                goto invalid_key;
 
        /* check the permissions */
-       ret = key_task_permission(key_ref, cred, perm);
+       ret = key_task_permission(key_ref, ctx.cred, perm);
        if (ret < 0)
                goto invalid_key;
 
        key->last_used_at = current_kernel_time().tv_sec;
 
 error:
-       put_cred(cred);
+       put_cred(ctx.cred);
        return key_ref;
 
 invalid_key:
@@ -733,7 +734,7 @@ invalid_key:
        /* if we attempted to install a keyring, then it may have caused new
         * creds to be installed */
 reget_creds:
-       put_cred(cred);
+       put_cred(ctx.cred);
        goto try_again;
 }
 
@@ -856,3 +857,13 @@ void key_change_session_keyring(struct callback_head *twork)
 
        commit_creds(new);
 }
+
+/*
+ * Make sure that root's user and user-session keyrings exist.
+ */
+static int __init init_root_keyring(void)
+{
+       return install_user_keyrings();
+}
+
+late_initcall(init_root_keyring);
index c411f9bb156b205751ae06983e85f547119a245f..381411941cc1abbc48a699b1c2c38042f1a05711 100644 (file)
@@ -345,33 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
  * May return a key that's already under construction instead if there was a
  * race between two thread calling request_key().
  */
-static int construct_alloc_key(struct key_type *type,
-                              const char *description,
+static int construct_alloc_key(struct keyring_search_context *ctx,
                               struct key *dest_keyring,
                               unsigned long flags,
                               struct key_user *user,
                               struct key **_key)
 {
-       const struct cred *cred = current_cred();
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
        struct key *key;
        key_perm_t perm;
        key_ref_t key_ref;
        int ret;
 
-       kenter("%s,%s,,,", type->name, description);
+       kenter("%s,%s,,,",
+              ctx->index_key.type->name, ctx->index_key.description);
 
        *_key = NULL;
        mutex_lock(&user->cons_lock);
 
        perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
        perm |= KEY_USR_VIEW;
-       if (type->read)
+       if (ctx->index_key.type->read)
                perm |= KEY_POS_READ;
-       if (type == &key_type_keyring || type->update)
+       if (ctx->index_key.type == &key_type_keyring ||
+           ctx->index_key.type->update)
                perm |= KEY_POS_WRITE;
 
-       key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
+       key = key_alloc(ctx->index_key.type, ctx->index_key.description,
+                       ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
                        perm, flags);
        if (IS_ERR(key))
                goto alloc_failed;
@@ -379,8 +380,7 @@ static int construct_alloc_key(struct key_type *type,
        set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
 
        if (dest_keyring) {
-               ret = __key_link_begin(dest_keyring, type, description,
-                                      &prealloc);
+               ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
                if (ret < 0)
                        goto link_prealloc_failed;
        }
@@ -390,16 +390,16 @@ static int construct_alloc_key(struct key_type *type,
         * waited for locks */
        mutex_lock(&key_construction_mutex);
 
-       key_ref = search_process_keyrings(type, description, type->match, cred);
+       key_ref = search_process_keyrings(ctx);
        if (!IS_ERR(key_ref))
                goto key_already_present;
 
        if (dest_keyring)
-               __key_link(dest_keyring, key, &prealloc);
+               __key_link(key, &edit);
 
        mutex_unlock(&key_construction_mutex);
        if (dest_keyring)
-               __key_link_end(dest_keyring, type, prealloc);
+               __key_link_end(dest_keyring, &ctx->index_key, edit);
        mutex_unlock(&user->cons_lock);
        *_key = key;
        kleave(" = 0 [%d]", key_serial(key));
@@ -414,8 +414,8 @@ key_already_present:
        if (dest_keyring) {
                ret = __key_link_check_live_key(dest_keyring, key);
                if (ret == 0)
-                       __key_link(dest_keyring, key, &prealloc);
-               __key_link_end(dest_keyring, type, prealloc);
+                       __key_link(key, &edit);
+               __key_link_end(dest_keyring, &ctx->index_key, edit);
                if (ret < 0)
                        goto link_check_failed;
        }
@@ -444,8 +444,7 @@ alloc_failed:
 /*
  * Commence key construction.
  */
-static struct key *construct_key_and_link(struct key_type *type,
-                                         const char *description,
+static struct key *construct_key_and_link(struct keyring_search_context *ctx,
                                          const char *callout_info,
                                          size_t callout_len,
                                          void *aux,
@@ -464,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type,
 
        construct_get_dest_keyring(&dest_keyring);
 
-       ret = construct_alloc_key(type, description, dest_keyring, flags, user,
-                                 &key);
+       ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
        key_user_put(user);
 
        if (ret == 0) {
@@ -529,17 +527,24 @@ struct key *request_key_and_link(struct key_type *type,
                                 struct key *dest_keyring,
                                 unsigned long flags)
 {
-       const struct cred *cred = current_cred();
+       struct keyring_search_context ctx = {
+               .index_key.type         = type,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = type->match,
+               .match_data             = description,
+               .flags                  = KEYRING_SEARCH_LOOKUP_DIRECT,
+       };
        struct key *key;
        key_ref_t key_ref;
        int ret;
 
        kenter("%s,%s,%p,%zu,%p,%p,%lx",
-              type->name, description, callout_info, callout_len, aux,
-              dest_keyring, flags);
+              ctx.index_key.type->name, ctx.index_key.description,
+              callout_info, callout_len, aux, dest_keyring, flags);
 
        /* search all the process keyrings for a key */
-       key_ref = search_process_keyrings(type, description, type->match, cred);
+       key_ref = search_process_keyrings(&ctx);
 
        if (!IS_ERR(key_ref)) {
                key = key_ref_to_ptr(key_ref);
@@ -562,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type,
                if (!callout_info)
                        goto error;
 
-               key = construct_key_and_link(type, description, callout_info,
-                                            callout_len, aux, dest_keyring,
-                                            flags);
+               key = construct_key_and_link(&ctx, callout_info, callout_len,
+                                            aux, dest_keyring, flags);
        }
 
 error:
@@ -592,8 +596,10 @@ int wait_for_key_construction(struct key *key, bool intr)
                          intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
        if (ret < 0)
                return ret;
-       if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+       if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
+               smp_rmb();
                return key->type_data.reject_error;
+       }
        return key_validate(key);
 }
 EXPORT_SYMBOL(wait_for_key_construction);
index 85730d5a5a59a05c852b3d22c586778b117589fa..7495a93b4b9024dad78d526d17feb7d07f93016f 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <asm/uaccess.h>
 #include "internal.h"
+#include <keys/user-type.h>
 
 static int request_key_auth_instantiate(struct key *,
                                        struct key_preparsed_payload *);
@@ -221,33 +222,27 @@ error_alloc:
        return ERR_PTR(ret);
 }
 
-/*
- * See if an authorisation key is associated with a particular key.
- */
-static int key_get_instantiation_authkey_match(const struct key *key,
-                                              const void *_id)
-{
-       struct request_key_auth *rka = key->payload.data;
-       key_serial_t id = (key_serial_t)(unsigned long) _id;
-
-       return rka->target_key->serial == id;
-}
-
 /*
  * Search the current process's keyrings for the authorisation key for
  * instantiation of a key.
  */
 struct key *key_get_instantiation_authkey(key_serial_t target_id)
 {
-       const struct cred *cred = current_cred();
+       char description[16];
+       struct keyring_search_context ctx = {
+               .index_key.type         = &key_type_request_key_auth,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = user_match,
+               .match_data             = description,
+               .flags                  = KEYRING_SEARCH_LOOKUP_DIRECT,
+       };
        struct key *authkey;
        key_ref_t authkey_ref;
 
-       authkey_ref = search_process_keyrings(
-               &key_type_request_key_auth,
-               (void *) (unsigned long) target_id,
-               key_get_instantiation_authkey_match,
-               cred);
+       sprintf(description, "%x", target_id);
+
+       authkey_ref = search_process_keyrings(&ctx);
 
        if (IS_ERR(authkey_ref)) {
                authkey = ERR_CAST(authkey_ref);
index ee32d181764ab876fa2c6b0470c4a65f937cd031..8c0af08760c809b2923d04c5cc3b114c75e27b27 100644 (file)
@@ -61,5 +61,16 @@ ctl_table key_sysctls[] = {
                .extra1 = (void *) &zero,
                .extra2 = (void *) &max,
        },
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       {
+               .procname = "persistent_keyring_expiry",
+               .data = &persistent_keyring_expiry,
+               .maxlen = sizeof(unsigned),
+               .mode = 0644,
+               .proc_handler = proc_dointvec_minmax,
+               .extra1 = (void *) &zero,
+               .extra2 = (void *) &max,
+       },
+#endif
        { }
 };
index 55dc88939185812f70145427b96c991ed6d636e2..faa2caeb593f8524a059e79d58e09bf430a1f992 100644 (file)
@@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc);
  * arbitrary blob of data as the payload
  */
 struct key_type key_type_user = {
-       .name           = "user",
-       .instantiate    = user_instantiate,
-       .update         = user_update,
-       .match          = user_match,
-       .revoke         = user_revoke,
-       .destroy        = user_destroy,
-       .describe       = user_describe,
-       .read           = user_read,
+       .name                   = "user",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
+       .instantiate            = user_instantiate,
+       .update                 = user_update,
+       .match                  = user_match,
+       .revoke                 = user_revoke,
+       .destroy                = user_destroy,
+       .describe               = user_describe,
+       .read                   = user_read,
 };
 
 EXPORT_SYMBOL_GPL(key_type_user);
@@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user);
  */
 struct key_type key_type_logon = {
        .name                   = "logon",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
        .instantiate            = user_instantiate,
        .update                 = user_update,
        .match                  = user_match,
index 234bc2ab450c61b42b1db2b53f631ab72bc48a39..9a62045e6282467493567a52f546d1e8d269bcd6 100644 (file)
@@ -397,7 +397,8 @@ void common_lsm_audit(struct common_audit_data *a,
        if (a == NULL)
                return;
        /* we use GFP_ATOMIC so we won't sleep */
-       ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
+       ab = audit_log_start(current->audit_context, GFP_ATOMIC | __GFP_NOWARN,
+                            AUDIT_AVC);
 
        if (ab == NULL)
                return;
index 4dc31f4f2700626cb951aed5e874f0ce18d8b064..15b6928592ef68aac565e3fc94daf4737b6adc54 100644 (file)
@@ -1340,22 +1340,17 @@ int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
        return security_ops->xfrm_policy_delete_security(ctx);
 }
 
-int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+int security_xfrm_state_alloc(struct xfrm_state *x,
+                             struct xfrm_user_sec_ctx *sec_ctx)
 {
-       return security_ops->xfrm_state_alloc_security(x, sec_ctx, 0);
+       return security_ops->xfrm_state_alloc(x, sec_ctx);
 }
 EXPORT_SYMBOL(security_xfrm_state_alloc);
 
 int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
                                      struct xfrm_sec_ctx *polsec, u32 secid)
 {
-       if (!polsec)
-               return 0;
-       /*
-        * We want the context to be taken from secid which is usually
-        * from the sock.
-        */
-       return security_ops->xfrm_state_alloc_security(x, NULL, secid);
+       return security_ops->xfrm_state_alloc_acquire(x, polsec, secid);
 }
 
 int security_xfrm_state_delete(struct xfrm_state *x)
index c540795fb3f2647619cb4705281872e93592e21e..794c3ca49eac92998caa17be71a4bdc472c2e9c8 100644 (file)
@@ -95,7 +95,9 @@
 #include "audit.h"
 #include "avc_ss.h"
 
-#define NUM_SEL_MNT_OPTS 5
+#define SB_TYPE_FMT "%s%s%s"
+#define SB_SUBTYPE(sb) (sb->s_subtype && sb->s_subtype[0])
+#define SB_TYPE_ARGS(sb) sb->s_type->name, SB_SUBTYPE(sb) ? "." : "", SB_SUBTYPE(sb) ? sb->s_subtype : ""
 
 extern struct security_operations *security_ops;
 
@@ -139,12 +141,28 @@ static struct kmem_cache *sel_inode_cache;
  * This function checks the SECMARK reference counter to see if any SECMARK
  * targets are currently configured, if the reference counter is greater than
  * zero SECMARK is considered to be enabled.  Returns true (1) if SECMARK is
- * enabled, false (0) if SECMARK is disabled.
+ * enabled, false (0) if SECMARK is disabled.  If the always_check_network
+ * policy capability is enabled, SECMARK is always considered enabled.
  *
  */
 static int selinux_secmark_enabled(void)
 {
-       return (atomic_read(&selinux_secmark_refcount) > 0);
+       return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount));
+}
+
+/**
+ * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled
+ *
+ * Description:
+ * This function checks if NetLabel or labeled IPSEC is enabled.  Returns true
+ * (1) if any are enabled or false (0) if neither are enabled.  If the
+ * always_check_network policy capability is enabled, peer labeling
+ * is always considered enabled.
+ *
+ */
+static int selinux_peerlbl_enabled(void)
+{
+       return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled());
 }
 
 /*
@@ -309,8 +327,11 @@ enum {
        Opt_defcontext = 3,
        Opt_rootcontext = 4,
        Opt_labelsupport = 5,
+       Opt_nextmntopt = 6,
 };
 
+#define NUM_SEL_MNT_OPTS       (Opt_nextmntopt - 1)
+
 static const match_table_t tokens = {
        {Opt_context, CONTEXT_STR "%s"},
        {Opt_fscontext, FSCONTEXT_STR "%s"},
@@ -355,6 +376,29 @@ static int may_context_mount_inode_relabel(u32 sid,
        return rc;
 }
 
+static int selinux_is_sblabel_mnt(struct super_block *sb)
+{
+       struct superblock_security_struct *sbsec = sb->s_security;
+
+       if (sbsec->behavior == SECURITY_FS_USE_XATTR ||
+           sbsec->behavior == SECURITY_FS_USE_TRANS ||
+           sbsec->behavior == SECURITY_FS_USE_TASK)
+               return 1;
+
+       /* Special handling for sysfs. Is genfs but also has setxattr handler*/
+       if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
+               return 1;
+
+       /*
+        * Special handling for rootfs. Is genfs but supports
+        * setting SELinux context on in-core inodes.
+        */
+       if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0)
+               return 1;
+
+       return 0;
+}
+
 static int sb_finish_set_opts(struct super_block *sb)
 {
        struct superblock_security_struct *sbsec = sb->s_security;
@@ -369,8 +413,8 @@ static int sb_finish_set_opts(struct super_block *sb)
                   the first boot of the SELinux kernel before we have
                   assigned xattr values to the filesystem. */
                if (!root_inode->i_op->getxattr) {
-                       printk(KERN_WARNING "SELinux: (dev %s, type %s) has no "
-                              "xattr support\n", sb->s_id, sb->s_type->name);
+                       printk(KERN_WARNING "SELinux: (dev %s, type "SB_TYPE_FMT") has no "
+                              "xattr support\n", sb->s_id, SB_TYPE_ARGS(sb));
                        rc = -EOPNOTSUPP;
                        goto out;
                }
@@ -378,35 +422,27 @@ static int sb_finish_set_opts(struct super_block *sb)
                if (rc < 0 && rc != -ENODATA) {
                        if (rc == -EOPNOTSUPP)
                                printk(KERN_WARNING "SELinux: (dev %s, type "
-                                      "%s) has no security xattr handler\n",
-                                      sb->s_id, sb->s_type->name);
+                                      SB_TYPE_FMT") has no security xattr handler\n",
+                                      sb->s_id, SB_TYPE_ARGS(sb));
                        else
                                printk(KERN_WARNING "SELinux: (dev %s, type "
-                                      "%s) getxattr errno %d\n", sb->s_id,
-                                      sb->s_type->name, -rc);
+                                      SB_TYPE_FMT") getxattr errno %d\n", sb->s_id,
+                                      SB_TYPE_ARGS(sb), -rc);
                        goto out;
                }
        }
 
-       sbsec->flags |= (SE_SBINITIALIZED | SE_SBLABELSUPP);
-
        if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
-               printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n",
-                      sb->s_id, sb->s_type->name);
+               printk(KERN_ERR "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), unknown behavior\n",
+                      sb->s_id, SB_TYPE_ARGS(sb));
        else
-               printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n",
-                      sb->s_id, sb->s_type->name,
+               printk(KERN_DEBUG "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), %s\n",
+                      sb->s_id, SB_TYPE_ARGS(sb),
                       labeling_behaviors[sbsec->behavior-1]);
 
-       if (sbsec->behavior == SECURITY_FS_USE_GENFS ||
-           sbsec->behavior == SECURITY_FS_USE_MNTPOINT ||
-           sbsec->behavior == SECURITY_FS_USE_NONE ||
-           sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
-               sbsec->flags &= ~SE_SBLABELSUPP;
-
-       /* Special handling for sysfs. Is genfs but also has setxattr handler*/
-       if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
-               sbsec->flags |= SE_SBLABELSUPP;
+       sbsec->flags |= SE_SBINITIALIZED;
+       if (selinux_is_sblabel_mnt(sb))
+               sbsec->flags |= SBLABEL_MNT;
 
        /* Initialize the root inode. */
        rc = inode_doinit_with_dentry(root_inode, root);
@@ -460,15 +496,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
        if (!ss_initialized)
                return -EINVAL;
 
+       /* make sure we always check enough bits to cover the mask */
+       BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS));
+
        tmp = sbsec->flags & SE_MNTMASK;
        /* count the number of mount options for this sb */
-       for (i = 0; i < 8; i++) {
+       for (i = 0; i < NUM_SEL_MNT_OPTS; i++) {
                if (tmp & 0x01)
                        opts->num_mnt_opts++;
                tmp >>= 1;
        }
        /* Check if the Label support flag is set */
-       if (sbsec->flags & SE_SBLABELSUPP)
+       if (sbsec->flags & SBLABEL_MNT)
                opts->num_mnt_opts++;
 
        opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC);
@@ -515,9 +554,9 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
                opts->mnt_opts[i] = context;
                opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT;
        }
-       if (sbsec->flags & SE_SBLABELSUPP) {
+       if (sbsec->flags & SBLABEL_MNT) {
                opts->mnt_opts[i] = NULL;
-               opts->mnt_opts_flags[i++] = SE_SBLABELSUPP;
+               opts->mnt_opts_flags[i++] = SBLABEL_MNT;
        }
 
        BUG_ON(i != opts->num_mnt_opts);
@@ -561,7 +600,6 @@ static int selinux_set_mnt_opts(struct super_block *sb,
        const struct cred *cred = current_cred();
        int rc = 0, i;
        struct superblock_security_struct *sbsec = sb->s_security;
-       const char *name = sb->s_type->name;
        struct inode *inode = sbsec->sb->s_root->d_inode;
        struct inode_security_struct *root_isec = inode->i_security;
        u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
@@ -614,14 +652,14 @@ static int selinux_set_mnt_opts(struct super_block *sb,
        for (i = 0; i < num_opts; i++) {
                u32 sid;
 
-               if (flags[i] == SE_SBLABELSUPP)
+               if (flags[i] == SBLABEL_MNT)
                        continue;
                rc = security_context_to_sid(mount_options[i],
                                             strlen(mount_options[i]), &sid);
                if (rc) {
                        printk(KERN_WARNING "SELinux: security_context_to_sid"
-                              "(%s) failed for (dev %s, type %s) errno=%d\n",
-                              mount_options[i], sb->s_id, name, rc);
+                              "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n",
+                              mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc);
                        goto out;
                }
                switch (flags[i]) {
@@ -685,9 +723,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
                 * Determine the labeling behavior to use for this
                 * filesystem type.
                 */
-               rc = security_fs_use((sbsec->flags & SE_SBPROC) ?
-                                       "proc" : sb->s_type->name,
-                                       &sbsec->behavior, &sbsec->sid);
+               rc = security_fs_use(sb);
                if (rc) {
                        printk(KERN_WARNING
                                "%s: security_fs_use(%s) returned %d\n",
@@ -770,7 +806,8 @@ out:
 out_double_mount:
        rc = -EINVAL;
        printk(KERN_WARNING "SELinux: mount invalid.  Same superblock, different "
-              "security settings for (dev %s, type %s)\n", sb->s_id, name);
+              "security settings for (dev %s, type "SB_TYPE_FMT")\n", sb->s_id,
+              SB_TYPE_ARGS(sb));
        goto out;
 }
 
@@ -1037,7 +1074,7 @@ static void selinux_write_opts(struct seq_file *m,
                case DEFCONTEXT_MNT:
                        prefix = DEFCONTEXT_STR;
                        break;
-               case SE_SBLABELSUPP:
+               case SBLABEL_MNT:
                        seq_putc(m, ',');
                        seq_puts(m, LABELSUPP_STR);
                        continue;
@@ -1649,7 +1686,7 @@ static int may_create(struct inode *dir,
        if (rc)
                return rc;
 
-       if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
+       if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
                rc = security_transition_sid(sid, dsec->sid, tclass,
                                             &dentry->d_name, &newsid);
                if (rc)
@@ -2437,14 +2474,14 @@ static int selinux_sb_remount(struct super_block *sb, void *data)
                u32 sid;
                size_t len;
 
-               if (flags[i] == SE_SBLABELSUPP)
+               if (flags[i] == SBLABEL_MNT)
                        continue;
                len = strlen(mount_options[i]);
                rc = security_context_to_sid(mount_options[i], len, &sid);
                if (rc) {
                        printk(KERN_WARNING "SELinux: security_context_to_sid"
-                              "(%s) failed for (dev %s, type %s) errno=%d\n",
-                              mount_options[i], sb->s_id, sb->s_type->name, rc);
+                              "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n",
+                              mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc);
                        goto out_free_opts;
                }
                rc = -EINVAL;
@@ -2482,8 +2519,8 @@ out_free_secdata:
        return rc;
 out_bad_option:
        printk(KERN_WARNING "SELinux: unable to change security options "
-              "during remount (dev %s, type=%s)\n", sb->s_id,
-              sb->s_type->name);
+              "during remount (dev %s, type "SB_TYPE_FMT")\n", sb->s_id,
+              SB_TYPE_ARGS(sb));
        goto out_free_opts;
 }
 
@@ -2606,7 +2643,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
        if ((sbsec->flags & SE_SBINITIALIZED) &&
            (sbsec->behavior == SECURITY_FS_USE_MNTPOINT))
                newsid = sbsec->mntpoint_sid;
-       else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
+       else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
                rc = security_transition_sid(sid, dsec->sid,
                                             inode_mode_to_security_class(inode->i_mode),
                                             qstr, &newsid);
@@ -2628,7 +2665,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
                isec->initialized = 1;
        }
 
-       if (!ss_initialized || !(sbsec->flags & SE_SBLABELSUPP))
+       if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT))
                return -EOPNOTSUPP;
 
        if (name)
@@ -2830,7 +2867,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
                return selinux_inode_setotherxattr(dentry, name);
 
        sbsec = inode->i_sb->s_security;
-       if (!(sbsec->flags & SE_SBLABELSUPP))
+       if (!(sbsec->flags & SBLABEL_MNT))
                return -EOPNOTSUPP;
 
        if (!inode_owner_or_capable(inode))
@@ -3791,8 +3828,12 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid)
        u32 nlbl_sid;
        u32 nlbl_type;
 
-       selinux_skb_xfrm_sid(skb, &xfrm_sid);
-       selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
+       err = selinux_skb_xfrm_sid(skb, &xfrm_sid);
+       if (unlikely(err))
+               return -EACCES;
+       err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
+       if (unlikely(err))
+               return -EACCES;
 
        err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid);
        if (unlikely(err)) {
@@ -4246,7 +4287,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
                return selinux_sock_rcv_skb_compat(sk, skb, family);
 
        secmark_active = selinux_secmark_enabled();
-       peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
        if (!secmark_active && !peerlbl_active)
                return 0;
 
@@ -4628,7 +4669,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 
        secmark_active = selinux_secmark_enabled();
        netlbl_active = netlbl_enabled();
-       peerlbl_active = netlbl_active || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
        if (!secmark_active && !peerlbl_active)
                return NF_ACCEPT;
 
@@ -4780,7 +4821,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
                return NF_ACCEPT;
 #endif
        secmark_active = selinux_secmark_enabled();
-       peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
        if (!secmark_active && !peerlbl_active)
                return NF_ACCEPT;
 
@@ -5784,7 +5825,8 @@ static struct security_operations selinux_ops = {
        .xfrm_policy_clone_security =   selinux_xfrm_policy_clone,
        .xfrm_policy_free_security =    selinux_xfrm_policy_free,
        .xfrm_policy_delete_security =  selinux_xfrm_policy_delete,
-       .xfrm_state_alloc_security =    selinux_xfrm_state_alloc,
+       .xfrm_state_alloc =             selinux_xfrm_state_alloc,
+       .xfrm_state_alloc_acquire =     selinux_xfrm_state_alloc_acquire,
        .xfrm_state_free_security =     selinux_xfrm_state_free,
        .xfrm_state_delete_security =   selinux_xfrm_state_delete,
        .xfrm_policy_lookup =           selinux_xfrm_policy_lookup,
index aa47bcabb5f65e728aadbaa39cdecfa55d20aa16..b1dfe104945078ead53647c247c46aa6134fac2e 100644 (file)
@@ -58,8 +58,8 @@ struct superblock_security_struct {
        u32 sid;                        /* SID of file system superblock */
        u32 def_sid;                    /* default SID for labeling */
        u32 mntpoint_sid;               /* SECURITY_FS_USE_MNTPOINT context for files */
-       unsigned int behavior;          /* labeling behavior */
-       unsigned char flags;            /* which mount options were specified */
+       unsigned short behavior;        /* labeling behavior */
+       unsigned short flags;           /* which mount options were specified */
        struct mutex lock;
        struct list_head isec_head;
        spinlock_t isec_lock;
index 8fd8e18ea34019c863d91ba88268b8c4018f3410..fe341ae370049b39ac2012d665a64dd4dc9af198 100644 (file)
 /* Mask for just the mount related flags */
 #define SE_MNTMASK     0x0f
 /* Super block security struct flags for mount options */
+/* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */
 #define CONTEXT_MNT    0x01
 #define FSCONTEXT_MNT  0x02
 #define ROOTCONTEXT_MNT        0x04
 #define DEFCONTEXT_MNT 0x08
+#define SBLABEL_MNT    0x10
 /* Non-mount related flags */
-#define SE_SBINITIALIZED       0x10
-#define SE_SBPROC              0x20
-#define SE_SBLABELSUPP 0x40
+#define SE_SBINITIALIZED       0x0100
+#define SE_SBPROC              0x0200
 
 #define CONTEXT_STR    "context="
 #define FSCONTEXT_STR  "fscontext="
@@ -68,12 +69,15 @@ extern int selinux_enabled;
 enum {
        POLICYDB_CAPABILITY_NETPEER,
        POLICYDB_CAPABILITY_OPENPERM,
+       POLICYDB_CAPABILITY_REDHAT1,
+       POLICYDB_CAPABILITY_ALWAYSNETWORK,
        __POLICYDB_CAPABILITY_MAX
 };
 #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
 
 extern int selinux_policycap_netpeer;
 extern int selinux_policycap_openperm;
+extern int selinux_policycap_alwaysnetwork;
 
 /*
  * type_datum properties
@@ -172,8 +176,7 @@ int security_get_allow_unknown(void);
 #define SECURITY_FS_USE_NATIVE         7 /* use native label support */
 #define SECURITY_FS_USE_MAX            7 /* Highest SECURITY_FS_USE_XXX */
 
-int security_fs_use(const char *fstype, unsigned int *behavior,
-       u32 *sid);
+int security_fs_use(struct super_block *sb);
 
 int security_genfs_sid(const char *fstype, char *name, u16 sclass,
        u32 *sid);
index 6713f04e30ba8810415f88f7ed6e78cb5685f6f4..0dec76c64cf53853d0eea6aac983db307c8636b8 100644 (file)
 #include <net/flow.h>
 
 int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
-                             struct xfrm_user_sec_ctx *sec_ctx);
+                             struct xfrm_user_sec_ctx *uctx);
 int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
                              struct xfrm_sec_ctx **new_ctxp);
 void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx);
 int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx);
 int selinux_xfrm_state_alloc(struct xfrm_state *x,
-       struct xfrm_user_sec_ctx *sec_ctx, u32 secid);
+                            struct xfrm_user_sec_ctx *uctx);
+int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                    struct xfrm_sec_ctx *polsec, u32 secid);
 void selinux_xfrm_state_free(struct xfrm_state *x);
 int selinux_xfrm_state_delete(struct xfrm_state *x);
 int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
 int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
-                       struct xfrm_policy *xp, const struct flowi *fl);
-
-/*
- * Extract the security blob from the sock (it's actually on the socket)
- */
-static inline struct inode_security_struct *get_sock_isec(struct sock *sk)
-{
-       if (!sk->sk_socket)
-               return NULL;
-
-       return SOCK_INODE(sk->sk_socket)->i_security;
-}
+                                     struct xfrm_policy *xp,
+                                     const struct flowi *fl);
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 extern atomic_t selinux_xfrm_refcount;
@@ -42,10 +34,10 @@ static inline int selinux_xfrm_enabled(void)
        return (atomic_read(&selinux_xfrm_refcount) > 0);
 }
 
-int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
-                       struct common_audit_data *ad);
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad, u8 proto);
+int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                             struct common_audit_data *ad);
+int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                               struct common_audit_data *ad, u8 proto);
 int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 
 static inline void selinux_xfrm_notify_policyload(void)
@@ -64,19 +56,21 @@ static inline int selinux_xfrm_enabled(void)
        return 0;
 }
 
-static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad)
+static inline int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                                           struct common_audit_data *ad)
 {
        return 0;
 }
 
-static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad, u8 proto)
+static inline int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                                             struct common_audit_data *ad,
+                                             u8 proto)
 {
        return 0;
 }
 
-static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid,
+                                             int ckall)
 {
        *sid = SECSID_NULL;
        return 0;
@@ -87,10 +81,9 @@ static inline void selinux_xfrm_notify_policyload(void)
 }
 #endif
 
-static inline void selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid)
+static inline int selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid)
 {
-       int err = selinux_xfrm_decode_session(skb, sid, 0);
-       BUG_ON(err);
+       return selinux_xfrm_decode_session(skb, sid, 0);
 }
 
 #endif /* _SELINUX_XFRM_H_ */
index da4b8b2332802c9624f2f7f49ea8d622f96e180a..6235d052338b2e63b838711ed09c7ba1b04c67c6 100644 (file)
@@ -442,8 +442,7 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
            sksec->nlbl_state != NLBL_CONNLABELED)
                return 0;
 
-       local_bh_disable();
-       bh_lock_sock_nested(sk);
+       lock_sock(sk);
 
        /* connected sockets are allowed to disconnect when the address family
         * is set to AF_UNSPEC, if that is what is happening we want to reset
@@ -464,7 +463,6 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
                sksec->nlbl_state = NLBL_CONNLABELED;
 
 socket_connect_return:
-       bh_unlock_sock(sk);
-       local_bh_enable();
+       release_sock(sk);
        return rc;
 }
index c5454c0477c346e4d814f5ff209feba86e5b86ad..03a72c32afd738ccad5c188bbe853202c32f53f6 100644 (file)
@@ -166,6 +166,7 @@ static void sel_netnode_insert(struct sel_netnode *node)
                break;
        default:
                BUG();
+               return;
        }
 
        /* we need to impose a limit on the growth of the hash table so check
@@ -225,6 +226,7 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid)
                break;
        default:
                BUG();
+               ret = -EINVAL;
        }
        if (ret != 0)
                goto out;
index 855e464e92efb9916535957ed53a3c9df2c1a33f..332ac8a80cf5b62c77bff350f6a92698d76a8e0f 100644 (file)
@@ -116,6 +116,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] =
        { AUDIT_MAKE_EQUIV,     NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
        { AUDIT_TTY_GET,        NETLINK_AUDIT_SOCKET__NLMSG_READ     },
        { AUDIT_TTY_SET,        NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT   },
+       { AUDIT_GET_FEATURE,    NETLINK_AUDIT_SOCKET__NLMSG_READ     },
+       { AUDIT_SET_FEATURE,    NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
 };
 
 
index ff427733c2903cab275a05da0887478850e1e374..5122affe06a8840e193150d62bd9b2f996fe67fe 100644 (file)
@@ -44,7 +44,9 @@
 /* Policy capability filenames */
 static char *policycap_names[] = {
        "network_peer_controls",
-       "open_perms"
+       "open_perms",
+       "redhat1",
+       "always_check_network"
 };
 
 unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
index 30f119b1d1ec36a95dc456c52b5b0ac1a6868514..820313a04d49bf4c4a8bc0f04ea01514ff184a64 100644 (file)
@@ -213,7 +213,12 @@ netlbl_import_failure:
 }
 #endif /* CONFIG_NETLABEL */
 
-int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
+/*
+ * Check to see if all the bits set in e2 are also set in e1. Optionally,
+ * if last_e2bit is non-zero, the highest set bit in e2 cannot exceed
+ * last_e2bit.
+ */
+int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit)
 {
        struct ebitmap_node *n1, *n2;
        int i;
@@ -223,14 +228,25 @@ int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
 
        n1 = e1->node;
        n2 = e2->node;
+
        while (n1 && n2 && (n1->startbit <= n2->startbit)) {
                if (n1->startbit < n2->startbit) {
                        n1 = n1->next;
                        continue;
                }
-               for (i = 0; i < EBITMAP_UNIT_NUMS; i++) {
+               for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i]; )
+                       i--;    /* Skip trailing NULL map entries */
+               if (last_e2bit && (i >= 0)) {
+                       u32 lastsetbit = n2->startbit + i * EBITMAP_UNIT_SIZE +
+                                        __fls(n2->maps[i]);
+                       if (lastsetbit > last_e2bit)
+                               return 0;
+               }
+
+               while (i >= 0) {
                        if ((n1->maps[i] & n2->maps[i]) != n2->maps[i])
                                return 0;
+                       i--;
                }
 
                n1 = n1->next;
index 922f8afa89dd5837e2617daaf793db0e40ad009e..712c8a7b8e8b879d3835b5ee3650b66baa46e106 100644 (file)
 
 #include <net/netlabel.h>
 
-#define EBITMAP_UNIT_NUMS      ((32 - sizeof(void *) - sizeof(u32))    \
+#ifdef CONFIG_64BIT
+#define        EBITMAP_NODE_SIZE       64
+#else
+#define        EBITMAP_NODE_SIZE       32
+#endif
+
+#define EBITMAP_UNIT_NUMS      ((EBITMAP_NODE_SIZE-sizeof(void *)-sizeof(u32))\
                                        / sizeof(unsigned long))
 #define EBITMAP_UNIT_SIZE      BITS_PER_LONG
 #define EBITMAP_SIZE           (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE)
@@ -117,7 +123,7 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n,
 
 int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
-int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2);
+int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit);
 int ebitmap_get_bit(struct ebitmap *e, unsigned long bit);
 int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
 void ebitmap_destroy(struct ebitmap *e);
index 40de8d3f208ecf95db162f4ae355d0d53ba99265..c85bc1ec040c0c58f93772004361cbcd04861575 100644 (file)
@@ -160,8 +160,6 @@ void mls_sid_to_context(struct context *context,
 int mls_level_isvalid(struct policydb *p, struct mls_level *l)
 {
        struct level_datum *levdatum;
-       struct ebitmap_node *node;
-       int i;
 
        if (!l->sens || l->sens > p->p_levels.nprim)
                return 0;
@@ -170,19 +168,13 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l)
        if (!levdatum)
                return 0;
 
-       ebitmap_for_each_positive_bit(&l->cat, node, i) {
-               if (i > p->p_cats.nprim)
-                       return 0;
-               if (!ebitmap_get_bit(&levdatum->level->cat, i)) {
-                       /*
-                        * Category may not be associated with
-                        * sensitivity.
-                        */
-                       return 0;
-               }
-       }
-
-       return 1;
+       /*
+        * Return 1 iff all the bits set in l->cat are also be set in
+        * levdatum->level->cat and no bit in l->cat is larger than
+        * p->p_cats.nprim.
+        */
+       return ebitmap_contains(&levdatum->level->cat, &l->cat,
+                               p->p_cats.nprim);
 }
 
 int mls_range_isvalid(struct policydb *p, struct mls_range *r)
index 03bed52a80526abfbda766a33859595cc1d8bfa5..e93648774137c601f5ec90ce14a03983655ce36d 100644 (file)
@@ -35,7 +35,7 @@ static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2)
 static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2)
 {
        return ((l1->sens >= l2->sens) &&
-               ebitmap_contains(&l1->cat, &l2->cat));
+               ebitmap_contains(&l1->cat, &l2->cat, 0));
 }
 
 #define mls_level_incomp(l1, l2) \
index c8adde3aff8fdbe93fb2f867e55f71b9879685a5..f6195ebde3c94eef0cdf1cf92933246069b25059 100644 (file)
@@ -3203,9 +3203,8 @@ static int range_write_helper(void *key, void *data, void *ptr)
 
 static int range_write(struct policydb *p, void *fp)
 {
-       size_t nel;
        __le32 buf[1];
-       int rc;
+       int rc, nel;
        struct policy_data pd;
 
        pd.p = p;
index b4feecc3fe0110d10bbdc183c369a03ab8495a6c..ee470a0b5c27fdad95a59b258792b6182435b999 100644 (file)
@@ -72,6 +72,7 @@
 
 int selinux_policycap_netpeer;
 int selinux_policycap_openperm;
+int selinux_policycap_alwaysnetwork;
 
 static DEFINE_RWLOCK(policy_rwlock);
 
@@ -1812,6 +1813,8 @@ static void security_load_policycaps(void)
                                                  POLICYDB_CAPABILITY_NETPEER);
        selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps,
                                                  POLICYDB_CAPABILITY_OPENPERM);
+       selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps,
+                                                 POLICYDB_CAPABILITY_ALWAYSNETWORK);
 }
 
 static int security_preserve_bools(struct policydb *p);
@@ -2323,43 +2326,74 @@ out:
 
 /**
  * security_fs_use - Determine how to handle labeling for a filesystem.
- * @fstype: filesystem type
- * @behavior: labeling behavior
- * @sid: SID for filesystem (superblock)
+ * @sb: superblock in question
  */
-int security_fs_use(
-       const char *fstype,
-       unsigned int *behavior,
-       u32 *sid)
+int security_fs_use(struct super_block *sb)
 {
        int rc = 0;
        struct ocontext *c;
+       struct superblock_security_struct *sbsec = sb->s_security;
+       const char *fstype = sb->s_type->name;
+       const char *subtype = (sb->s_subtype && sb->s_subtype[0]) ? sb->s_subtype : NULL;
+       struct ocontext *base = NULL;
 
        read_lock(&policy_rwlock);
 
-       c = policydb.ocontexts[OCON_FSUSE];
-       while (c) {
-               if (strcmp(fstype, c->u.name) == 0)
+       for (c = policydb.ocontexts[OCON_FSUSE]; c; c = c->next) {
+               char *sub;
+               int baselen;
+
+               baselen = strlen(fstype);
+
+               /* if base does not match, this is not the one */
+               if (strncmp(fstype, c->u.name, baselen))
+                       continue;
+
+               /* if there is no subtype, this is the one! */
+               if (!subtype)
+                       break;
+
+               /* skip past the base in this entry */
+               sub = c->u.name + baselen;
+
+               /* entry is only a base. save it. keep looking for subtype */
+               if (sub[0] == '\0') {
+                       base = c;
+                       continue;
+               }
+
+               /* entry is not followed by a subtype, so it is not a match */
+               if (sub[0] != '.')
+                       continue;
+
+               /* whew, we found a subtype of this fstype */
+               sub++; /* move past '.' */
+
+               /* exact match of fstype AND subtype */
+               if (!strcmp(subtype, sub))
                        break;
-               c = c->next;
        }
 
+       /* in case we had found an fstype match but no subtype match */
+       if (!c)
+               c = base;
+
        if (c) {
-               *behavior = c->v.behavior;
+               sbsec->behavior = c->v.behavior;
                if (!c->sid[0]) {
                        rc = sidtab_context_to_sid(&sidtab, &c->context[0],
                                                   &c->sid[0]);
                        if (rc)
                                goto out;
                }
-               *sid = c->sid[0];
+               sbsec->sid = c->sid[0];
        } else {
-               rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid);
+               rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, &sbsec->sid);
                if (rc) {
-                       *behavior = SECURITY_FS_USE_NONE;
+                       sbsec->behavior = SECURITY_FS_USE_NONE;
                        rc = 0;
                } else {
-                       *behavior = SECURITY_FS_USE_GENFS;
+                       sbsec->behavior = SECURITY_FS_USE_GENFS;
                }
        }
 
index d030818862146732ebe30c8cc3f266d485ef0677..a91d205ec0c6094cc9a0fecb5d427d4d24b1ed9a 100644 (file)
@@ -56,7 +56,7 @@
 atomic_t selinux_xfrm_refcount = ATOMIC_INIT(0);
 
 /*
- * Returns true if an LSM/SELinux context
+ * Returns true if the context is an LSM/SELinux context.
  */
 static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx)
 {
@@ -66,7 +66,7 @@ static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx)
 }
 
 /*
- * Returns true if the xfrm contains a security blob for SELinux
+ * Returns true if the xfrm contains a security blob for SELinux.
  */
 static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
 {
@@ -74,48 +74,111 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
 }
 
 /*
- * LSM hook implementation that authorizes that a flow can use
- * a xfrm policy rule.
+ * Allocates a xfrm_sec_state and populates it using the supplied security
+ * xfrm_user_sec_ctx context.
  */
-int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp,
+                                  struct xfrm_user_sec_ctx *uctx)
 {
        int rc;
-       u32 sel_sid;
+       const struct task_security_struct *tsec = current_security();
+       struct xfrm_sec_ctx *ctx = NULL;
+       u32 str_len;
 
-       /* Context sid is either set to label or ANY_ASSOC */
-       if (ctx) {
-               if (!selinux_authorizable_ctx(ctx))
-                       return -EINVAL;
-
-               sel_sid = ctx->ctx_sid;
-       } else
-               /*
-                * All flows should be treated as polmatch'ing an
-                * otherwise applicable "non-labeled" policy. This
-                * would prevent inadvertent "leaks".
-                */
-               return 0;
+       if (ctxp == NULL || uctx == NULL ||
+           uctx->ctx_doi != XFRM_SC_DOI_LSM ||
+           uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
+               return -EINVAL;
 
-       rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__POLMATCH,
-                         NULL);
+       str_len = uctx->ctx_len;
+       if (str_len >= PAGE_SIZE)
+               return -ENOMEM;
 
-       if (rc == -EACCES)
-               return -ESRCH;
+       ctx = kmalloc(sizeof(*ctx) + str_len + 1, GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
 
+       ctx->ctx_doi = XFRM_SC_DOI_LSM;
+       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+       ctx->ctx_len = str_len;
+       memcpy(ctx->ctx_str, &uctx[1], str_len);
+       ctx->ctx_str[str_len] = '\0';
+       rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid);
+       if (rc)
+               goto err;
+
+       rc = avc_has_perm(tsec->sid, ctx->ctx_sid,
+                         SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL);
+       if (rc)
+               goto err;
+
+       *ctxp = ctx;
+       atomic_inc(&selinux_xfrm_refcount);
+       return 0;
+
+err:
+       kfree(ctx);
        return rc;
 }
 
+/*
+ * Free the xfrm_sec_ctx structure.
+ */
+static void selinux_xfrm_free(struct xfrm_sec_ctx *ctx)
+{
+       if (!ctx)
+               return;
+
+       atomic_dec(&selinux_xfrm_refcount);
+       kfree(ctx);
+}
+
+/*
+ * Authorize the deletion of a labeled SA or policy rule.
+ */
+static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx)
+{
+       const struct task_security_struct *tsec = current_security();
+
+       if (!ctx)
+               return 0;
+
+       return avc_has_perm(tsec->sid, ctx->ctx_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
+                           NULL);
+}
+
+/*
+ * LSM hook implementation that authorizes that a flow can use a xfrm policy
+ * rule.
+ */
+int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+{
+       int rc;
+
+       /* All flows should be treated as polmatch'ing an otherwise applicable
+        * "non-labeled" policy. This would prevent inadvertent "leaks". */
+       if (!ctx)
+               return 0;
+
+       /* Context sid is either set to label or ANY_ASSOC */
+       if (!selinux_authorizable_ctx(ctx))
+               return -EINVAL;
+
+       rc = avc_has_perm(fl_secid, ctx->ctx_sid,
+                         SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL);
+       return (rc == -EACCES ? -ESRCH : rc);
+}
+
 /*
  * LSM hook implementation that authorizes that a state matches
  * the given policy, flow combo.
  */
-
-int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp,
-                       const struct flowi *fl)
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
+                                     struct xfrm_policy *xp,
+                                     const struct flowi *fl)
 {
        u32 state_sid;
-       int rc;
 
        if (!xp->security)
                if (x->security)
@@ -138,187 +201,80 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *
        if (fl->flowi_secid != state_sid)
                return 0;
 
-       rc = avc_has_perm(fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SENDTO,
-                         NULL)? 0:1;
-
-       /*
-        * We don't need a separate SA Vs. policy polmatch check
-        * since the SA is now of the same label as the flow and
-        * a flow Vs. policy polmatch check had already happened
-        * in selinux_xfrm_policy_lookup() above.
-        */
-
-       return rc;
+       /* We don't need a separate SA Vs. policy polmatch check since the SA
+        * is now of the same label as the flow and a flow Vs. policy polmatch
+        * check had already happened in selinux_xfrm_policy_lookup() above. */
+       return (avc_has_perm(fl->flowi_secid, state_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO,
+                           NULL) ? 0 : 1);
 }
 
 /*
  * LSM hook implementation that checks and/or returns the xfrm sid for the
  * incoming packet.
  */
-
 int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
 {
+       u32 sid_session = SECSID_NULL;
        struct sec_path *sp;
 
-       *sid = SECSID_NULL;
-
        if (skb == NULL)
-               return 0;
+               goto out;
 
        sp = skb->sp;
        if (sp) {
-               int i, sid_set = 0;
+               int i;
 
-               for (i = sp->len-1; i >= 0; i--) {
+               for (i = sp->len - 1; i >= 0; i--) {
                        struct xfrm_state *x = sp->xvec[i];
                        if (selinux_authorizable_xfrm(x)) {
                                struct xfrm_sec_ctx *ctx = x->security;
 
-                               if (!sid_set) {
-                                       *sid = ctx->ctx_sid;
-                                       sid_set = 1;
-
+                               if (sid_session == SECSID_NULL) {
+                                       sid_session = ctx->ctx_sid;
                                        if (!ckall)
-                                               break;
-                               } else if (*sid != ctx->ctx_sid)
+                                               goto out;
+                               } else if (sid_session != ctx->ctx_sid) {
+                                       *sid = SECSID_NULL;
                                        return -EINVAL;
+                               }
                        }
                }
        }
 
-       return 0;
-}
-
-/*
- * Security blob allocation for xfrm_policy and xfrm_state
- * CTX does not have a meaningful value on input
- */
-static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
-       struct xfrm_user_sec_ctx *uctx, u32 sid)
-{
-       int rc = 0;
-       const struct task_security_struct *tsec = current_security();
-       struct xfrm_sec_ctx *ctx = NULL;
-       char *ctx_str = NULL;
-       u32 str_len;
-
-       BUG_ON(uctx && sid);
-
-       if (!uctx)
-               goto not_from_user;
-
-       if (uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
-               return -EINVAL;
-
-       str_len = uctx->ctx_len;
-       if (str_len >= PAGE_SIZE)
-               return -ENOMEM;
-
-       *ctxp = ctx = kmalloc(sizeof(*ctx) +
-                             str_len + 1,
-                             GFP_KERNEL);
-
-       if (!ctx)
-               return -ENOMEM;
-
-       ctx->ctx_doi = uctx->ctx_doi;
-       ctx->ctx_len = str_len;
-       ctx->ctx_alg = uctx->ctx_alg;
-
-       memcpy(ctx->ctx_str,
-              uctx+1,
-              str_len);
-       ctx->ctx_str[str_len] = 0;
-       rc = security_context_to_sid(ctx->ctx_str,
-                                    str_len,
-                                    &ctx->ctx_sid);
-
-       if (rc)
-               goto out;
-
-       /*
-        * Does the subject have permission to set security context?
-        */
-       rc = avc_has_perm(tsec->sid, ctx->ctx_sid,
-                         SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SETCONTEXT, NULL);
-       if (rc)
-               goto out;
-
-       return rc;
-
-not_from_user:
-       rc = security_sid_to_context(sid, &ctx_str, &str_len);
-       if (rc)
-               goto out;
-
-       *ctxp = ctx = kmalloc(sizeof(*ctx) +
-                             str_len,
-                             GFP_ATOMIC);
-
-       if (!ctx) {
-               rc = -ENOMEM;
-               goto out;
-       }
-
-       ctx->ctx_doi = XFRM_SC_DOI_LSM;
-       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
-       ctx->ctx_sid = sid;
-       ctx->ctx_len = str_len;
-       memcpy(ctx->ctx_str,
-              ctx_str,
-              str_len);
-
-       goto out2;
-
 out:
-       *ctxp = NULL;
-       kfree(ctx);
-out2:
-       kfree(ctx_str);
-       return rc;
+       *sid = sid_session;
+       return 0;
 }
 
 /*
- * LSM hook implementation that allocs and transfers uctx spec to
- * xfrm_policy.
+ * LSM hook implementation that allocs and transfers uctx spec to xfrm_policy.
  */
 int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
                              struct xfrm_user_sec_ctx *uctx)
 {
-       int err;
-
-       BUG_ON(!uctx);
-
-       err = selinux_xfrm_sec_ctx_alloc(ctxp, uctx, 0);
-       if (err == 0)
-               atomic_inc(&selinux_xfrm_refcount);
-
-       return err;
+       return selinux_xfrm_alloc_user(ctxp, uctx);
 }
 
-
 /*
- * LSM hook implementation that copies security data structure from old to
- * new for policy cloning.
+ * LSM hook implementation that copies security data structure from old to new
+ * for policy cloning.
  */
 int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
                              struct xfrm_sec_ctx **new_ctxp)
 {
        struct xfrm_sec_ctx *new_ctx;
 
-       if (old_ctx) {
-               new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len,
-                                 GFP_ATOMIC);
-               if (!new_ctx)
-                       return -ENOMEM;
+       if (!old_ctx)
+               return 0;
+
+       new_ctx = kmemdup(old_ctx, sizeof(*old_ctx) + old_ctx->ctx_len,
+                         GFP_ATOMIC);
+       if (!new_ctx)
+               return -ENOMEM;
+       atomic_inc(&selinux_xfrm_refcount);
+       *new_ctxp = new_ctx;
 
-               memcpy(new_ctx, old_ctx, sizeof(*new_ctx));
-               memcpy(new_ctx->ctx_str, old_ctx->ctx_str, new_ctx->ctx_len);
-               atomic_inc(&selinux_xfrm_refcount);
-               *new_ctxp = new_ctx;
-       }
        return 0;
 }
 
@@ -327,8 +283,7 @@ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
  */
 void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
 {
-       atomic_dec(&selinux_xfrm_refcount);
-       kfree(ctx);
+       selinux_xfrm_free(ctx);
 }
 
 /*
@@ -336,31 +291,55 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
  */
 int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
 {
-       const struct task_security_struct *tsec = current_security();
-
-       if (!ctx)
-               return 0;
+       return selinux_xfrm_delete(ctx);
+}
 
-       return avc_has_perm(tsec->sid, ctx->ctx_sid,
-                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
-                           NULL);
+/*
+ * LSM hook implementation that allocates a xfrm_sec_state, populates it using
+ * the supplied security context, and assigns it to the xfrm_state.
+ */
+int selinux_xfrm_state_alloc(struct xfrm_state *x,
+                            struct xfrm_user_sec_ctx *uctx)
+{
+       return selinux_xfrm_alloc_user(&x->security, uctx);
 }
 
 /*
- * LSM hook implementation that allocs and transfers sec_ctx spec to
- * xfrm_state.
+ * LSM hook implementation that allocates a xfrm_sec_state and populates based
+ * on a secid.
  */
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx,
-               u32 secid)
+int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                    struct xfrm_sec_ctx *polsec, u32 secid)
 {
-       int err;
+       int rc;
+       struct xfrm_sec_ctx *ctx;
+       char *ctx_str = NULL;
+       int str_len;
+
+       if (!polsec)
+               return 0;
 
-       BUG_ON(!x);
+       if (secid == 0)
+               return -EINVAL;
 
-       err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, secid);
-       if (err == 0)
-               atomic_inc(&selinux_xfrm_refcount);
-       return err;
+       rc = security_sid_to_context(secid, &ctx_str, &str_len);
+       if (rc)
+               return rc;
+
+       ctx = kmalloc(sizeof(*ctx) + str_len, GFP_ATOMIC);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->ctx_doi = XFRM_SC_DOI_LSM;
+       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+       ctx->ctx_sid = secid;
+       ctx->ctx_len = str_len;
+       memcpy(ctx->ctx_str, ctx_str, str_len);
+       kfree(ctx_str);
+
+       x->security = ctx;
+       atomic_inc(&selinux_xfrm_refcount);
+       return 0;
 }
 
 /*
@@ -368,24 +347,15 @@ int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uct
  */
 void selinux_xfrm_state_free(struct xfrm_state *x)
 {
-       atomic_dec(&selinux_xfrm_refcount);
-       kfree(x->security);
+       selinux_xfrm_free(x->security);
 }
 
- /*
 * LSM hook implementation that authorizes deletion of labeled SAs.
 */
+/*
+ * LSM hook implementation that authorizes deletion of labeled SAs.
+ */
 int selinux_xfrm_state_delete(struct xfrm_state *x)
 {
-       const struct task_security_struct *tsec = current_security();
-       struct xfrm_sec_ctx *ctx = x->security;
-
-       if (!ctx)
-               return 0;
-
-       return avc_has_perm(tsec->sid, ctx->ctx_sid,
-                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
-                           NULL);
+       return selinux_xfrm_delete(x->security);
 }
 
 /*
@@ -395,14 +365,12 @@ int selinux_xfrm_state_delete(struct xfrm_state *x)
  * we need to check for unlabelled access since this may not have
  * gone thru the IPSec process.
  */
-int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-                               struct common_audit_data *ad)
+int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                             struct common_audit_data *ad)
 {
-       int i, rc = 0;
-       struct sec_path *sp;
-       u32 sel_sid = SECINITSID_UNLABELED;
-
-       sp = skb->sp;
+       int i;
+       struct sec_path *sp = skb->sp;
+       u32 peer_sid = SECINITSID_UNLABELED;
 
        if (sp) {
                for (i = 0; i < sp->len; i++) {
@@ -410,23 +378,17 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
 
                        if (x && selinux_authorizable_xfrm(x)) {
                                struct xfrm_sec_ctx *ctx = x->security;
-                               sel_sid = ctx->ctx_sid;
+                               peer_sid = ctx->ctx_sid;
                                break;
                        }
                }
        }
 
-       /*
-        * This check even when there's no association involved is
-        * intended, according to Trent Jaeger, to make sure a
-        * process can't engage in non-ipsec communication unless
-        * explicitly allowed by policy.
-        */
-
-       rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__RECVFROM, ad);
-
-       return rc;
+       /* This check even when there's no association involved is intended,
+        * according to Trent Jaeger, to make sure a process can't engage in
+        * non-IPsec communication unless explicitly allowed by policy. */
+       return avc_has_perm(sk_sid, peer_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, ad);
 }
 
 /*
@@ -436,49 +398,38 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
  * If we do have a authorizable security association, then it has already been
  * checked in the selinux_xfrm_state_pol_flow_match hook above.
  */
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                                       struct common_audit_data *ad, u8 proto)
+int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                               struct common_audit_data *ad, u8 proto)
 {
        struct dst_entry *dst;
-       int rc = 0;
-
-       dst = skb_dst(skb);
-
-       if (dst) {
-               struct dst_entry *dst_test;
-
-               for (dst_test = dst; dst_test != NULL;
-                    dst_test = dst_test->child) {
-                       struct xfrm_state *x = dst_test->xfrm;
-
-                       if (x && selinux_authorizable_xfrm(x))
-                               goto out;
-               }
-       }
 
        switch (proto) {
        case IPPROTO_AH:
        case IPPROTO_ESP:
        case IPPROTO_COMP:
-               /*
-                * We should have already seen this packet once before
-                * it underwent xfrm(s). No need to subject it to the
-                * unlabeled check.
-                */
-               goto out;
+               /* We should have already seen this packet once before it
+                * underwent xfrm(s). No need to subject it to the unlabeled
+                * check. */
+               return 0;
        default:
                break;
        }
 
-       /*
-        * This check even when there's no association involved is
-        * intended, according to Trent Jaeger, to make sure a
-        * process can't engage in non-ipsec communication unless
-        * explicitly allowed by policy.
-        */
+       dst = skb_dst(skb);
+       if (dst) {
+               struct dst_entry *iter;
 
-       rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SENDTO, ad);
-out:
-       return rc;
+               for (iter = dst; iter != NULL; iter = iter->child) {
+                       struct xfrm_state *x = iter->xfrm;
+
+                       if (x && selinux_authorizable_xfrm(x))
+                               return 0;
+               }
+       }
+
+       /* This check even when there's no association involved is intended,
+        * according to Trent Jaeger, to make sure a process can't engage in
+        * non-IPsec communication unless explicitly allowed by policy. */
+       return avc_has_perm(sk_sid, SECINITSID_UNLABELED,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, ad);
 }
index 076b8e8a51abd50d833bd2bc6a262349ed902f0e..364cc64fce717be1e75d16fa91f4aeaadc55539f 100644 (file)
@@ -177,9 +177,13 @@ struct smk_port_label {
 #define SMACK_CIPSO_MAXCATNUM           184     /* 23 * 8 */
 
 /*
- * Flag for transmute access
+ * Flags for untraditional access modes.
+ * It shouldn't be necessary to avoid conflicts with definitions
+ * in fs.h, but do so anyway.
  */
-#define MAY_TRANSMUTE  64
+#define MAY_TRANSMUTE  0x00001000      /* Controls directory labeling */
+#define MAY_LOCK       0x00002000      /* Locks should be writes, but ... */
+
 /*
  * Just to make the common cases easier to deal with
  */
@@ -188,9 +192,9 @@ struct smk_port_label {
 #define MAY_NOT                0
 
 /*
- * Number of access types used by Smack (rwxat)
+ * Number of access types used by Smack (rwxatl)
  */
-#define SMK_NUM_ACCESS_TYPE 5
+#define SMK_NUM_ACCESS_TYPE 6
 
 /* SMACK data */
 struct smack_audit_data {
index b3b59b1e93d6e6b056789243b77f0b319e56c982..14293cd9b1e53b4a260e9258a5cad54c75d71204 100644 (file)
@@ -84,6 +84,8 @@ int log_policy = SMACK_AUDIT_DENIED;
  *
  * Do the object check first because that is more
  * likely to differ.
+ *
+ * Allowing write access implies allowing locking.
  */
 int smk_access_entry(char *subject_label, char *object_label,
                        struct list_head *rule_list)
@@ -99,6 +101,11 @@ int smk_access_entry(char *subject_label, char *object_label,
                }
        }
 
+       /*
+        * MAY_WRITE implies MAY_LOCK.
+        */
+       if ((may & MAY_WRITE) == MAY_WRITE)
+               may |= MAY_LOCK;
        return may;
 }
 
@@ -245,6 +252,7 @@ out_audit:
 static inline void smack_str_from_perm(char *string, int access)
 {
        int i = 0;
+
        if (access & MAY_READ)
                string[i++] = 'r';
        if (access & MAY_WRITE)
@@ -255,6 +263,8 @@ static inline void smack_str_from_perm(char *string, int access)
                string[i++] = 'a';
        if (access & MAY_TRANSMUTE)
                string[i++] = 't';
+       if (access & MAY_LOCK)
+               string[i++] = 'l';
        string[i] = '\0';
 }
 /**
index 8825375cc031709b3918cd073cd574708c3f0405..b0be893ad44d52bd0f062a1747199330c4d6940d 100644 (file)
@@ -185,7 +185,7 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
        smk_ad_setfield_u_tsk(&ad, ctp);
 
-       rc = smk_curacc(skp->smk_known, MAY_READWRITE, &ad);
+       rc = smk_curacc(skp->smk_known, mode, &ad);
        return rc;
 }
 
@@ -1146,7 +1146,7 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd,
  * @file: the object
  * @cmd: unused
  *
- * Returns 0 if current has write access, error code otherwise
+ * Returns 0 if current has lock access, error code otherwise
  */
 static int smack_file_lock(struct file *file, unsigned int cmd)
 {
@@ -1154,7 +1154,7 @@ static int smack_file_lock(struct file *file, unsigned int cmd)
 
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
        smk_ad_setfield_u_fs_path(&ad, file->f_path);
-       return smk_curacc(file->f_security, MAY_WRITE, &ad);
+       return smk_curacc(file->f_security, MAY_LOCK, &ad);
 }
 
 /**
@@ -1178,8 +1178,13 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
 
        switch (cmd) {
        case F_GETLK:
+               break;
        case F_SETLK:
        case F_SETLKW:
+               smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
+               smk_ad_setfield_u_fs_path(&ad, file->f_path);
+               rc = smk_curacc(file->f_security, MAY_LOCK, &ad);
+               break;
        case F_SETOWN:
        case F_SETSIG:
                smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
index 80f4b4a45725bddba4f094d2fefdd4701c07cb6b..160aa08e3cd5ecd775c819941a056d5c04e1a13e 100644 (file)
@@ -139,7 +139,7 @@ const char *smack_cipso_option = SMACK_CIPSO_OPTION;
  * SMK_LOADLEN: Smack rule length
  */
 #define SMK_OACCESS    "rwxa"
-#define SMK_ACCESS     "rwxat"
+#define SMK_ACCESS     "rwxatl"
 #define SMK_OACCESSLEN (sizeof(SMK_OACCESS) - 1)
 #define SMK_ACCESSLEN  (sizeof(SMK_ACCESS) - 1)
 #define SMK_OLOADLEN   (SMK_LABELLEN + SMK_LABELLEN + SMK_OACCESSLEN)
@@ -282,6 +282,10 @@ static int smk_perm_from_str(const char *string)
                case 'T':
                        perm |= MAY_TRANSMUTE;
                        break;
+               case 'l':
+               case 'L':
+                       perm |= MAY_LOCK;
+                       break;
                default:
                        return perm;
                }
@@ -452,7 +456,7 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf,
                /*
                 * Minor hack for backward compatibility
                 */
-               if (count != SMK_OLOADLEN && count != SMK_LOADLEN)
+               if (count < SMK_OLOADLEN || count > SMK_LOADLEN)
                        return -EINVAL;
        } else {
                if (count >= PAGE_SIZE) {
@@ -592,6 +596,8 @@ static void smk_rule_show(struct seq_file *s, struct smack_rule *srp, int max)
                seq_putc(s, 'a');
        if (srp->smk_access & MAY_TRANSMUTE)
                seq_putc(s, 't');
+       if (srp->smk_access & MAY_LOCK)
+               seq_putc(s, 'l');
 
        seq_putc(s, '\n');
 }
index fa64cd85204fff5a9615f9cabbb5ac519a164d9d..fb5d107f56034eebec7003878eceded711a51769 100644 (file)
@@ -238,7 +238,7 @@ static void davinci_pcm_dma_irq(unsigned link, u16 ch_status, void *data)
        print_buf_info(prtd->ram_channel, "i ram_channel");
        pr_debug("davinci_pcm: link=%d, status=0x%x\n", link, ch_status);
 
-       if (unlikely(ch_status != DMA_COMPLETE))
+       if (unlikely(ch_status != EDMA_DMA_COMPLETE))
                return;
 
        if (snd_pcm_running(substream)) {
index fe702076ca46cc2d3d02bab818446c9d15f8c392..9d77f13c2d2548934293662e314828a817d16144 100644 (file)
@@ -2,7 +2,7 @@
  * turbostat -- show CPU frequency and C-state residency
  * on modern Intel turbo-capable processors.
  *
- * Copyright (c) 2012 Intel Corporation.
+ * Copyright (c) 2013 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -47,6 +47,8 @@ unsigned int skip_c1;
 unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
 unsigned int do_c8_c9_c10;
+unsigned int do_slm_cstates;
+unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
 unsigned int units = 1000000000;       /* Ghz etc */
@@ -81,6 +83,8 @@ double rapl_joule_counter_range;
 #define RAPL_DRAM      (1 << 3)
 #define RAPL_PKG_PERF_STATUS   (1 << 4)
 #define RAPL_DRAM_PERF_STATUS  (1 << 5)
+#define RAPL_PKG_POWER_INFO    (1 << 6)
+#define RAPL_CORE_POLICY       (1 << 7)
 #define        TJMAX_DEFAULT   100
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -96,7 +100,7 @@ struct thread_data {
        unsigned long long tsc;
        unsigned long long aperf;
        unsigned long long mperf;
-       unsigned long long c1;  /* derived */
+       unsigned long long c1;
        unsigned long long extra_msr64;
        unsigned long long extra_delta64;
        unsigned long long extra_msr32;
@@ -266,7 +270,7 @@ void print_header(void)
                outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
        if (do_nhm_cstates)
                outp += sprintf(outp, "    %%c1");
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                outp += sprintf(outp, "    %%c3");
        if (do_nhm_cstates)
                outp += sprintf(outp, "    %%c6");
@@ -280,9 +284,9 @@ void print_header(void)
 
        if (do_snb_cstates)
                outp += sprintf(outp, "   %%pc2");
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                outp += sprintf(outp, "   %%pc3");
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                outp += sprintf(outp, "   %%pc6");
        if (do_snb_cstates)
                outp += sprintf(outp, "   %%pc7");
@@ -480,7 +484,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                goto done;
 
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
        if (do_nhm_cstates)
                outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
@@ -499,9 +503,9 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
        if (do_snb_cstates)
                outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
-       if (do_nhm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates)
                outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
        if (do_snb_cstates)
                outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
@@ -648,17 +652,24 @@ delta_thread(struct thread_data *new, struct thread_data *old,
        }
 
 
-       /*
-        * As counter collection is not atomic,
-        * it is possible for mperf's non-halted cycles + idle states
-        * to exceed TSC's all cycles: show c1 = 0% in that case.
-        */
-       if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
-               old->c1 = 0;
-       else {
-               /* normal case, derive c1 */
-               old->c1 = old->tsc - old->mperf - core_delta->c3
+       if (use_c1_residency_msr) {
+               /*
+                * Some models have a dedicated C1 residency MSR,
+                * which should be more accurate than the derivation below.
+                */
+       } else {
+               /*
+                * As counter collection is not atomic,
+                * it is possible for mperf's non-halted cycles + idle states
+                * to exceed TSC's all cycles: show c1 = 0% in that case.
+                */
+               if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+                       old->c1 = 0;
+               else {
+                       /* normal case, derive c1 */
+                       old->c1 = old->tsc - old->mperf - core_delta->c3
                                - core_delta->c6 - core_delta->c7;
+               }
        }
 
        if (old->mperf == 0) {
@@ -872,13 +883,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
                        return -5;
 
+       if (use_c1_residency_msr) {
+               if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+                       return -6;
+       }
+
        /* collect core counters only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
 
-       if (do_nhm_cstates) {
+       if (do_nhm_cstates && !do_slm_cstates) {
                if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
                        return -6;
+       }
+
+       if (do_nhm_cstates) {
                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
                        return -7;
        }
@@ -898,7 +917,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
                return 0;
 
-       if (do_nhm_cstates) {
+       if (do_nhm_cstates && !do_slm_cstates) {
                if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
                        return -9;
                if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
@@ -977,7 +996,7 @@ void print_verbose_header(void)
                ratio, bclk, ratio * bclk);
 
        get_msr(0, MSR_IA32_POWER_CTL, &msr);
-       fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
+       fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
                msr, msr & 0x2 ? "EN" : "DIS");
 
        if (!do_ivt_turbo_ratio_limit)
@@ -1046,25 +1065,28 @@ print_nhm_turbo_ratio_limits:
 
        switch(msr & 0x7) {
        case 0:
-               fprintf(stderr, "pc0");
+               fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
                break;
        case 1:
-               fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
+               fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
                break;
        case 2:
-               fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
+               fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
                break;
        case 3:
-               fprintf(stderr, "pc6");
+               fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
                break;
        case 4:
-               fprintf(stderr, "pc7");
+               fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
                break;
        case 5:
-               fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
+               fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
+               break;
+       case 6:
+               fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
                break;
        case 7:
-               fprintf(stderr, "unlimited");
+               fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
                break;
        default:
                fprintf(stderr, "invalid");
@@ -1460,6 +1482,8 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
        case 0x3F:      /* HSW */
        case 0x45:      /* HSW */
        case 0x46:      /* HSW */
+       case 0x37:      /* BYT */
+       case 0x4D:      /* AVN */
                return 1;
        case 0x2E:      /* Nehalem-EX Xeon - Beckton */
        case 0x2F:      /* Westmere-EX Xeon - Eagleton */
@@ -1532,14 +1556,33 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 #define        RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
 #define        RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
 
+double get_tdp(model)
+{
+       unsigned long long msr;
+
+       if (do_rapl & RAPL_PKG_POWER_INFO)
+               if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+                       return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+
+       switch (model) {
+       case 0x37:
+       case 0x4D:
+               return 30.0;
+       default:
+               return 135.0;
+       }
+}
+
+
 /*
  * rapl_probe()
  *
- * sets do_rapl
+ * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
  */
 void rapl_probe(unsigned int family, unsigned int model)
 {
        unsigned long long msr;
+       unsigned int time_unit;
        double tdp;
 
        if (!genuine_intel)
@@ -1555,11 +1598,15 @@ void rapl_probe(unsigned int family, unsigned int model)
        case 0x3F:      /* HSW */
        case 0x45:      /* HSW */
        case 0x46:      /* HSW */
-               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
+               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
                break;
        case 0x2D:
        case 0x3E:
-               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
+               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+               break;
+       case 0x37:      /* BYT */
+       case 0x4D:      /* AVN */
+               do_rapl = RAPL_PKG | RAPL_CORES ;
                break;
        default:
                return;
@@ -1570,19 +1617,22 @@ void rapl_probe(unsigned int family, unsigned int model)
                return;
 
        rapl_power_units = 1.0 / (1 << (msr & 0xF));
-       rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
-       rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
+       if (model == 0x37)
+               rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
+       else
+               rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
 
-       /* get TDP to determine energy counter range */
-       if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
-               return;
+       time_unit = msr >> 16 & 0xF;
+       if (time_unit == 0)
+               time_unit = 0xA;
 
-       tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+       rapl_time_units = 1.0 / (1 << (time_unit));
 
-       rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+       tdp = get_tdp(model);
 
+       rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
        if (verbose)
-               fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
+               fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 
        return;
 }
@@ -1668,7 +1718,6 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
        unsigned long long msr;
        int cpu;
-       double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units;
 
        if (!do_rapl)
                return 0;
@@ -1686,23 +1735,13 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
                return -1;
 
-       local_rapl_power_units = 1.0 / (1 << (msr & 0xF));
-       local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
-       local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
-
-       if (local_rapl_power_units != rapl_power_units)
-               fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu);
-       if (local_rapl_energy_units != rapl_energy_units)
-               fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu);
-       if (local_rapl_time_units != rapl_time_units)
-               fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu);
-
        if (verbose) {
                fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
                        "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
-                       local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
+                       rapl_power_units, rapl_energy_units, rapl_time_units);
        }
-       if (do_rapl & RAPL_PKG) {
+       if (do_rapl & RAPL_PKG_POWER_INFO) {
+
                if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
                        return -5;
 
@@ -1714,6 +1753,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
 
+       }
+       if (do_rapl & RAPL_PKG) {
+
                if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
                        return -9;
 
@@ -1749,12 +1791,16 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
                print_power_limit_msr(cpu, msr, "DRAM Limit");
        }
-       if (do_rapl & RAPL_CORES) {
+       if (do_rapl & RAPL_CORE_POLICY) {
                if (verbose) {
                        if (get_msr(cpu, MSR_PP0_POLICY, &msr))
                                return -7;
 
                        fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+               }
+       }
+       if (do_rapl & RAPL_CORES) {
+               if (verbose) {
 
                        if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
                                return -9;
@@ -1813,10 +1859,48 @@ int has_c8_c9_c10(unsigned int family, unsigned int model)
 }
 
 
+int is_slm(unsigned int family, unsigned int model)
+{
+       if (!genuine_intel)
+               return 0;
+       switch (model) {
+       case 0x37:      /* BYT */
+       case 0x4D:      /* AVN */
+               return 1;
+       }
+       return 0;
+}
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
+
+double slm_bclk(void)
+{
+       unsigned long long msr = 3;
+       unsigned int i;
+       double freq;
+
+       if (get_msr(0, MSR_FSB_FREQ, &msr))
+               fprintf(stderr, "SLM BCLK: unknown\n");
+
+       i = msr & 0xf;
+       if (i >= SLM_BCLK_FREQS) {
+               fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
+               msr = 3;
+       }
+       freq = slm_freq_table[i];
+
+       fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
+
+       return freq;
+}
+
 double discover_bclk(unsigned int family, unsigned int model)
 {
        if (is_snb(family, model))
                return 100.00;
+       else if (is_slm(family, model))
+               return slm_bclk();
        else
                return 133.33;
 }
@@ -1873,7 +1957,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
                fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
                        cpu, msr, target_c_local);
 
-       if (target_c_local < 85 || target_c_local > 120)
+       if (target_c_local < 85 || target_c_local > 127)
                goto guess;
 
        tcc_activation_temp = target_c_local;
@@ -1970,6 +2054,7 @@ void check_cpuid()
        do_smi = do_nhm_cstates;
        do_snb_cstates = is_snb(family, model);
        do_c8_c9_c10 = has_c8_c9_c10(family, model);
+       do_slm_cstates = is_slm(family, model);
        bclk = discover_bclk(family, model);
 
        do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
@@ -2331,7 +2416,7 @@ int main(int argc, char **argv)
        cmdline(argc, argv);
 
        if (verbose)
-               fprintf(stderr, "turbostat v3.4 April 17, 2013"
+               fprintf(stderr, "turbostat v3.5 April 26, 2013"
                        " - Len Brown <lenb@kernel.org>\n");
 
        turbostat_init();
index 662f34c3287e59e8f0707d80b0c88396d402c8c0..a0aa84b5941ac96aabae48b03d80278052ce8929 100644 (file)
@@ -1615,8 +1615,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
 
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
 {
-       return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
-                                   offset, len);
+       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+       return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);