pstore: new filesystem interface to platform persistent storage
authorTony Luck <tony.luck@intel.com>
Tue, 28 Dec 2010 22:25:21 +0000 (14:25 -0800)
committerTony Luck <tony.luck@intel.com>
Tue, 28 Dec 2010 22:25:21 +0000 (14:25 -0800)
Some platforms have a small amount of non-volatile storage that
can be used to store information useful to diagnose the cause of
a system crash.  This is the generic part of a file system interface
that presents information from the crash as a series of files in
/dev/pstore.  Once the information has been seen, the underlying
storage is freed by deleting the files.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Documentation/ABI/testing/pstore [new file with mode: 0644]
Documentation/ABI/testing/sysfs-fs-pstore [new file with mode: 0644]
fs/Kconfig
fs/Makefile
fs/pstore/Kconfig [new file with mode: 0644]
fs/pstore/Makefile [new file with mode: 0644]
fs/pstore/inode.c [new file with mode: 0644]
fs/pstore/internal.h [new file with mode: 0644]
fs/pstore/platform.c [new file with mode: 0644]
include/linux/magic.h
include/linux/pstore.h [new file with mode: 0644]

diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore
new file mode 100644 (file)
index 0000000..f1fb2a0
--- /dev/null
@@ -0,0 +1,35 @@
+Where:         /dev/pstore/...
+Date:          January 2011
+Kernel Version: 2.6.38
+Contact:       tony.luck@intel.com
+Description:   Generic interface to platform dependent persistent storage.
+
+               Platforms that provide a mechanism to preserve some data
+               across system reboots can register with this driver to
+               provide a generic interface to show records captured in
+               the dying moments.  In the case of a panic the last part
+               of the console log is captured, but other interesting
+               data can also be saved.
+
+               # mount -t pstore - /dev/pstore
+
+               $ ls -l /dev/pstore
+               total 0
+               -r--r--r-- 1 root root 7896 Nov 30 15:38 dmesg-erst-1
+
+               Different users of this interface will result in different
+               filename prefixes.  Currently two are defined:
+
+               "dmesg" - saved console log
+               "mce"   - architecture dependent data from fatal h/w error
+
+               Once the information in a file has been read, removing
+               the file will signal to the underlying persistent storage
+               device that it can reclaim the space for later re-use.
+
+               $ rm /dev/pstore/dmesg-erst-1
+
+               The expectation is that all files in /dev/pstore
+               will be saved elsewhere and erased from persistent store
+               soon after boot to free up space ready for the next
+               catastrophe.
diff --git a/Documentation/ABI/testing/sysfs-fs-pstore b/Documentation/ABI/testing/sysfs-fs-pstore
new file mode 100644 (file)
index 0000000..8e659d8
--- /dev/null
@@ -0,0 +1,7 @@
+What:          /sys/fs/pstore/kmsg_bytes
+Date:          January 2011
+Kernel Version: 2.6.38
+Contact:       "Tony Luck" <tony.luck@intel.com>
+Description:
+               Controls amount of console log that will be saved
+               to persistent store on oops/panic.
index 771f457402d4937c4ca29d448ab886b5d4959ae8..2bbe47fec1ec7874e3326eac2b8ce2d790cdc032 100644 (file)
@@ -188,6 +188,7 @@ source "fs/omfs/Kconfig"
 source "fs/hpfs/Kconfig"
 source "fs/qnx4/Kconfig"
 source "fs/romfs/Kconfig"
+source "fs/pstore/Kconfig"
 source "fs/sysv/Kconfig"
 source "fs/ufs/Kconfig"
 source "fs/exofs/Kconfig"
index a7f7cef0c0c8343da03006fe39d5cce1a6bd225a..db71a5b21a4f2b4f02ae1e04953f52e70a1a584d 100644 (file)
@@ -121,3 +121,4 @@ obj-$(CONFIG_BTRFS_FS)              += btrfs/
 obj-$(CONFIG_GFS2_FS)           += gfs2/
 obj-$(CONFIG_EXOFS_FS)          += exofs/
 obj-$(CONFIG_CEPH_FS)          += ceph/
+obj-$(CONFIG_PSTORE)           += pstore/
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
new file mode 100644 (file)
index 0000000..867d0ac
--- /dev/null
@@ -0,0 +1,13 @@
+config PSTORE
+       bool "Persistant store support"
+       default n
+       help
+          This option enables generic access to platform level
+          persistent storage via "pstore" filesystem that can
+          be mounted as /dev/pstore.  Only useful if you have
+          a platform level driver that registers with pstore to
+          provide the data, so you probably should just go say "Y"
+          (or "M") to a platform specific persistent store driver
+          (e.g. ACPI_APEI on X86) which will select this for you.
+          If you don't have a platform persistent store driver,
+          say N.
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
new file mode 100644 (file)
index 0000000..760f4bc
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# Makefile for the linux pstorefs routines.
+#
+
+obj-y += pstore.o
+
+pstore-objs += inode.o platform.o
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
new file mode 100644 (file)
index 0000000..0e806aa
--- /dev/null
@@ -0,0 +1,280 @@
+/*
+ * Persistent Storage - ramfs parts.
+ *
+ * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/mount.h>
+#include <linux/ramfs.h>
+#include <linux/sched.h>
+#include <linux/magic.h>
+#include <linux/pstore.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "internal.h"
+
+#define        PSTORE_NAMELEN  64
+
+struct pstore_private {
+       u64     id;
+       int     (*erase)(u64);
+};
+
+#define pstore_get_inode ramfs_get_inode
+
+/*
+ * When a file is unlinked from our file system we call the
+ * platform driver to erase the record from persistent store.
+ */
+static int pstore_unlink(struct inode *dir, struct dentry *dentry)
+{
+       struct pstore_private *p = dentry->d_inode->i_private;
+
+       p->erase(p->id);
+       kfree(p);
+
+       return simple_unlink(dir, dentry);
+}
+
+static const struct inode_operations pstore_dir_inode_operations = {
+       .lookup         = simple_lookup,
+       .unlink         = pstore_unlink,
+};
+
+static const struct super_operations pstore_ops = {
+       .statfs         = simple_statfs,
+       .drop_inode     = generic_delete_inode,
+       .show_options   = generic_show_options,
+};
+
+static struct super_block *pstore_sb;
+static struct vfsmount *pstore_mnt;
+
+int pstore_is_mounted(void)
+{
+       return pstore_mnt != NULL;
+}
+
+/*
+ * Set up a file structure as if we had opened this file and
+ * write our data to it.
+ */
+static int pstore_writefile(struct inode *inode, struct dentry *dentry,
+       char *data, size_t size)
+{
+       struct file f;
+       ssize_t n;
+       mm_segment_t old_fs = get_fs();
+
+       memset(&f, '0', sizeof f);
+       f.f_mapping = inode->i_mapping;
+       f.f_path.dentry = dentry;
+       f.f_path.mnt = pstore_mnt;
+       f.f_pos = 0;
+       f.f_op = inode->i_fop;
+       set_fs(KERNEL_DS);
+       n = do_sync_write(&f, data, size, &f.f_pos);
+       set_fs(old_fs);
+
+       fsnotify_modify(&f);
+
+       return n == size;
+}
+
+/*
+ * Make a regular file in the root directory of our file system.
+ * Load it up with "size" bytes of data from "buf".
+ * Set the mtime & ctime to the date that this record was originally stored.
+ */
+int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
+                             char *data, size_t size,
+                             struct timespec time, int (*erase)(u64))
+{
+       struct dentry           *root = pstore_sb->s_root;
+       struct dentry           *dentry;
+       struct inode            *inode;
+       int                     rc;
+       char                    name[PSTORE_NAMELEN];
+       struct pstore_private   *private;
+
+       rc = -ENOMEM;
+       inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0);
+       if (!inode)
+               goto fail;
+       inode->i_uid = inode->i_gid = 0;
+       private = kmalloc(sizeof *private, GFP_KERNEL);
+       if (!private)
+               goto fail_alloc;
+       private->id = id;
+       private->erase = erase;
+
+       switch (type) {
+       case PSTORE_TYPE_DMESG:
+               sprintf(name, "dmesg-%s-%lld", psname, id);
+               break;
+       case PSTORE_TYPE_MCE:
+               sprintf(name, "mce-%s-%lld", psname, id);
+               break;
+       case PSTORE_TYPE_UNKNOWN:
+               sprintf(name, "unknown-%s-%lld", psname, id);
+               break;
+       default:
+               sprintf(name, "type%d-%s-%lld", type, psname, id);
+               break;
+       }
+
+       mutex_lock(&root->d_inode->i_mutex);
+
+       rc = -ENOSPC;
+       dentry = d_alloc_name(root, name);
+       if (IS_ERR(dentry))
+               goto fail_lockedalloc;
+
+       d_add(dentry, inode);
+
+       mutex_unlock(&root->d_inode->i_mutex);
+
+       if (!pstore_writefile(inode, dentry, data, size))
+               goto fail_write;
+
+       inode->i_private = private;
+
+       if (time.tv_sec)
+               inode->i_mtime = inode->i_ctime = time;
+
+       return 0;
+
+fail_write:
+       kfree(private);
+       inode->i_nlink--;
+       mutex_lock(&root->d_inode->i_mutex);
+       d_delete(dentry);
+       dput(dentry);
+       mutex_unlock(&root->d_inode->i_mutex);
+       goto fail;
+
+fail_lockedalloc:
+       mutex_unlock(&root->d_inode->i_mutex);
+       kfree(private);
+fail_alloc:
+       iput(inode);
+
+fail:
+       return rc;
+}
+
+int pstore_fill_super(struct super_block *sb, void *data, int silent)
+{
+       struct inode *inode = NULL;
+       struct dentry *root;
+       int err;
+
+       save_mount_options(sb, data);
+
+       pstore_sb = sb;
+
+       sb->s_maxbytes          = MAX_LFS_FILESIZE;
+       sb->s_blocksize         = PAGE_CACHE_SIZE;
+       sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
+       sb->s_magic             = PSTOREFS_MAGIC;
+       sb->s_op                = &pstore_ops;
+       sb->s_time_gran         = 1;
+
+       inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0);
+       if (!inode) {
+               err = -ENOMEM;
+               goto fail;
+       }
+       /* override ramfs "dir" options so we catch unlink(2) */
+       inode->i_op = &pstore_dir_inode_operations;
+
+       root = d_alloc_root(inode);
+       sb->s_root = root;
+       if (!root) {
+               err = -ENOMEM;
+               goto fail;
+       }
+
+       pstore_get_records();
+
+       return 0;
+fail:
+       iput(inode);
+       return err;
+}
+
+static int pstore_get_sb(struct file_system_type *fs_type,
+       int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+       struct dentry *root;
+
+       root = mount_nodev(fs_type, flags, data, pstore_fill_super);
+       if (IS_ERR(root))
+               return -ENOMEM;
+
+       mnt->mnt_root = root;
+       mnt->mnt_sb = root->d_sb;
+       pstore_mnt = mnt;
+
+       return 0;
+}
+
+static void pstore_kill_sb(struct super_block *sb)
+{
+       kill_litter_super(sb);
+       pstore_sb = NULL;
+       pstore_mnt = NULL;
+}
+
+static struct file_system_type pstore_fs_type = {
+       .name           = "pstore",
+       .get_sb         = pstore_get_sb,
+       .kill_sb        = pstore_kill_sb,
+};
+
+static int __init init_pstore_fs(void)
+{
+       int ret = 0;
+       struct kobject *pstorefs_kobj;
+
+       pstorefs_kobj = kobject_create_and_add("pstore", fs_kobj);
+       if (!pstorefs_kobj)
+               return -ENOMEM;
+
+       sysfs_create_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr);
+
+       ret = register_filesystem(&pstore_fs_type);
+
+       if (ret) {
+               sysfs_remove_file(pstorefs_kobj, &pstore_kmsg_bytes_attr.attr);
+               kobject_put(pstorefs_kobj);
+       }
+
+       return ret;
+}
+module_init(init_pstore_fs)
+
+MODULE_AUTHOR("Tony Luck <tony.luck@intel.com>");
+MODULE_LICENSE("GPL");
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
new file mode 100644 (file)
index 0000000..76c26d2
--- /dev/null
@@ -0,0 +1,7 @@
+extern void    pstore_get_records(void);
+extern int     pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
+                             char *data, size_t size,
+                             struct timespec time, int (*erase)(u64));
+extern int     pstore_is_mounted(void);
+
+extern struct kobj_attribute pstore_kmsg_bytes_attr;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
new file mode 100644 (file)
index 0000000..705fdf8
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ * Persistent Storage - platform driver interface parts.
+ *
+ * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/atomic.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kmsg_dump.h>
+#include <linux/module.h>
+#include <linux/pstore.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "internal.h"
+
+/*
+ * pstore_lock just protects "psinfo" during
+ * calls to pstore_register()
+ */
+static DEFINE_SPINLOCK(pstore_lock);
+static struct pstore_info *psinfo;
+
+/* How much of the console log to snapshot. /sys/fs/pstore/kmsg_bytes */
+static unsigned long kmsg_bytes = 10240;
+
+static ssize_t b_show(struct kobject *kobj,
+                     struct kobj_attribute *attr, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE, "%lu\n", kmsg_bytes);
+}
+
+static ssize_t b_store(struct kobject *kobj, struct kobj_attribute *attr,
+                      const char *buf, size_t count)
+{
+       return (sscanf(buf, "%lu", &kmsg_bytes) > 0) ? count : 0;
+}
+
+struct kobj_attribute pstore_kmsg_bytes_attr =
+       __ATTR(kmsg_bytes, S_IRUGO | S_IWUSR, b_show, b_store);
+
+/* Tag each group of saved records with a sequence number */
+static int     oopscount;
+
+/*
+ * callback from kmsg_dump. (s2,l2) has the most recently
+ * written bytes, older bytes are in (s1,l1). Save as much
+ * as we can from the end of the buffer.
+ */
+static void pstore_dump(struct kmsg_dumper *dumper,
+           enum kmsg_dump_reason reason,
+           const char *s1, unsigned long l1,
+           const char *s2, unsigned long l2)
+{
+       unsigned long   s1_start, s2_start;
+       unsigned long   l1_cpy, l2_cpy;
+       unsigned long   size, total = 0;
+       char            *dst;
+       u64             id;
+       int             hsize, part = 1;
+
+       mutex_lock(&psinfo->buf_mutex);
+       oopscount++;
+       while (total < kmsg_bytes) {
+               dst = psinfo->buf;
+               hsize = sprintf(dst, "Oops#%d Part%d\n", oopscount, part++);
+               size = psinfo->bufsize - hsize;
+               dst += hsize;
+
+               l2_cpy = min(l2, size);
+               l1_cpy = min(l1, size - l2_cpy);
+
+               if (l1_cpy + l2_cpy == 0)
+                       break;
+
+               s2_start = l2 - l2_cpy;
+               s1_start = l1 - l1_cpy;
+
+               memcpy(dst, s1 + s1_start, l1_cpy);
+               memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
+
+               id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy);
+               if (pstore_is_mounted())
+                       pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
+                                     psinfo->buf, hsize + l1_cpy + l2_cpy,
+                                     CURRENT_TIME, psinfo->erase);
+               l1 -= l1_cpy;
+               l2 -= l2_cpy;
+               total += l1_cpy + l2_cpy;
+       }
+       mutex_unlock(&psinfo->buf_mutex);
+}
+
+static struct kmsg_dumper pstore_dumper = {
+       .dump = pstore_dump,
+};
+
+/*
+ * platform specific persistent storage driver registers with
+ * us here. If pstore is already mounted, call the platform
+ * read function right away to populate the file system. If not
+ * then the pstore mount code will call us later to fill out
+ * the file system.
+ *
+ * Register with kmsg_dump to save last part of console log on panic.
+ */
+int pstore_register(struct pstore_info *psi)
+{
+       struct module *owner = psi->owner;
+
+       spin_lock(&pstore_lock);
+       if (psinfo) {
+               spin_unlock(&pstore_lock);
+               return -EBUSY;
+       }
+       psinfo = psi;
+       spin_unlock(&pstore_lock);
+
+       if (owner && !try_module_get(owner)) {
+               psinfo = NULL;
+               return -EINVAL;
+       }
+
+       if (pstore_is_mounted())
+               pstore_get_records();
+
+       kmsg_dump_register(&pstore_dumper);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pstore_register);
+
+/*
+ * Read all the records from the persistent store. Create and
+ * file files in our filesystem.
+ */
+void pstore_get_records(void)
+{
+       struct pstore_info *psi = psinfo;
+       size_t                  size;
+       u64                     id;
+       enum pstore_type_id     type;
+       struct timespec         time;
+       int                     failed = 0;
+
+       if (!psi)
+               return;
+
+       mutex_lock(&psinfo->buf_mutex);
+       while ((size = psi->read(&id, &type, &time)) > 0) {
+               if (pstore_mkfile(type, psi->name, id, psi->buf, size,
+                                 time, psi->erase))
+                       failed++;
+       }
+       mutex_unlock(&psinfo->buf_mutex);
+
+       if (failed)
+               printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
+                      failed, psi->name);
+}
+
+/*
+ * Call platform driver to write a record to the
+ * persistent store.
+ */
+int pstore_write(enum pstore_type_id type, char *buf, size_t size)
+{
+       u64     id;
+
+       if (!psinfo)
+               return -ENODEV;
+
+       if (size > psinfo->bufsize)
+               return -EFBIG;
+
+       mutex_lock(&psinfo->buf_mutex);
+       memcpy(psinfo->buf, buf, size);
+       id = psinfo->write(type, size);
+       if (pstore_is_mounted())
+               pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
+                             size, CURRENT_TIME, psinfo->erase);
+       mutex_unlock(&psinfo->buf_mutex);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pstore_write);
index ff690d05f129c5af1f51b740c975f6a014d55221..e87fd5ac3e5e025fba4ae51b5027d937c3b700b2 100644 (file)
@@ -26,6 +26,7 @@
 #define ISOFS_SUPER_MAGIC      0x9660
 #define JFFS2_SUPER_MAGIC      0x72b6
 #define ANON_INODE_FS_MAGIC    0x09041934
+#define PSTOREFS_MAGIC         0x6165676C
 
 #define MINIX_SUPER_MAGIC      0x137F          /* original minix fs */
 #define MINIX_SUPER_MAGIC2     0x138F          /* minix fs, 30 char names */
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
new file mode 100644 (file)
index 0000000..4197773
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Persistent Storage - pstore.h
+ *
+ * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
+ *
+ * This code is the generic layer to export data records from platform
+ * level persistent storage via a file system.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_PSTORE_H
+#define _LINUX_PSTORE_H
+
+/* types */
+enum pstore_type_id {
+       PSTORE_TYPE_DMESG       = 0,
+       PSTORE_TYPE_MCE         = 1,
+       PSTORE_TYPE_UNKNOWN     = 255
+};
+
+struct pstore_info {
+       struct module   *owner;
+       char            *name;
+       struct mutex    buf_mutex;      /* serialize access to 'buf' */
+       char            *buf;
+       size_t          bufsize;
+       size_t          (*read)(u64 *id, enum pstore_type_id *type,
+                       struct timespec *time);
+       u64             (*write)(enum pstore_type_id type, size_t size);
+       int             (*erase)(u64 id);
+};
+
+#ifdef CONFIG_PSTORE
+extern int pstore_register(struct pstore_info *);
+extern int pstore_write(enum pstore_type_id type, char *buf, size_t size);
+#else
+static inline int
+pstore_register(struct pstore_info *psi)
+{
+       return -ENODEV;
+}
+static inline int
+pstore_write(enum pstore_type_id type, char *buf, size_t size)
+{
+       return -ENODEV;
+}
+#endif
+
+#endif /*_LINUX_PSTORE_H*/