Merge branch 'acpi-ec'
[linux-drm-fsl-dcu.git] / drivers / md / bitmap.c
1 /*
2  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3  *
4  * bitmap_create  - sets up the bitmap structure
5  * bitmap_destroy - destroys the bitmap structure
6  *
7  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8  * - added disk storage for bitmap
9  * - changes to allow various bitmap chunk sizes
10  */
11
12 /*
13  * Still to do:
14  *
15  * flush after percent set rather than just time based. (maybe both).
16  */
17
18 #include <linux/blkdev.h>
19 #include <linux/module.h>
20 #include <linux/errno.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/timer.h>
24 #include <linux/sched.h>
25 #include <linux/list.h>
26 #include <linux/file.h>
27 #include <linux/mount.h>
28 #include <linux/buffer_head.h>
29 #include <linux/seq_file.h>
30 #include "md.h"
31 #include "bitmap.h"
32
33 static inline char *bmname(struct bitmap *bitmap)
34 {
35         return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
36 }
37
38 /*
39  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
40  *
41  * 1) check to see if this page is allocated, if it's not then try to alloc
42  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
43  *    page pointer directly as a counter
44  *
45  * if we find our page, we increment the page's refcount so that it stays
46  * allocated while we're using it
47  */
48 static int bitmap_checkpage(struct bitmap_counts *bitmap,
49                             unsigned long page, int create)
50 __releases(bitmap->lock)
51 __acquires(bitmap->lock)
52 {
53         unsigned char *mappage;
54
55         if (page >= bitmap->pages) {
56                 /* This can happen if bitmap_start_sync goes beyond
57                  * End-of-device while looking for a whole page.
58                  * It is harmless.
59                  */
60                 return -EINVAL;
61         }
62
63         if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
64                 return 0;
65
66         if (bitmap->bp[page].map) /* page is already allocated, just return */
67                 return 0;
68
69         if (!create)
70                 return -ENOENT;
71
72         /* this page has not been allocated yet */
73
74         spin_unlock_irq(&bitmap->lock);
75         /* It is possible that this is being called inside a
76          * prepare_to_wait/finish_wait loop from raid5c:make_request().
77          * In general it is not permitted to sleep in that context as it
78          * can cause the loop to spin freely.
79          * That doesn't apply here as we can only reach this point
80          * once with any loop.
81          * When this function completes, either bp[page].map or
82          * bp[page].hijacked.  In either case, this function will
83          * abort before getting to this point again.  So there is
84          * no risk of a free-spin, and so it is safe to assert
85          * that sleeping here is allowed.
86          */
87         sched_annotate_sleep();
88         mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
89         spin_lock_irq(&bitmap->lock);
90
91         if (mappage == NULL) {
92                 pr_debug("md/bitmap: map page allocation failed, hijacking\n");
93                 /* failed - set the hijacked flag so that we can use the
94                  * pointer as a counter */
95                 if (!bitmap->bp[page].map)
96                         bitmap->bp[page].hijacked = 1;
97         } else if (bitmap->bp[page].map ||
98                    bitmap->bp[page].hijacked) {
99                 /* somebody beat us to getting the page */
100                 kfree(mappage);
101                 return 0;
102         } else {
103
104                 /* no page was in place and we have one, so install it */
105
106                 bitmap->bp[page].map = mappage;
107                 bitmap->missing_pages--;
108         }
109         return 0;
110 }
111
112 /* if page is completely empty, put it back on the free list, or dealloc it */
113 /* if page was hijacked, unmark the flag so it might get alloced next time */
114 /* Note: lock should be held when calling this */
115 static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
116 {
117         char *ptr;
118
119         if (bitmap->bp[page].count) /* page is still busy */
120                 return;
121
122         /* page is no longer in use, it can be released */
123
124         if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
125                 bitmap->bp[page].hijacked = 0;
126                 bitmap->bp[page].map = NULL;
127         } else {
128                 /* normal case, free the page */
129                 ptr = bitmap->bp[page].map;
130                 bitmap->bp[page].map = NULL;
131                 bitmap->missing_pages++;
132                 kfree(ptr);
133         }
134 }
135
136 /*
137  * bitmap file handling - read and write the bitmap file and its superblock
138  */
139
140 /*
141  * basic page I/O operations
142  */
143
144 /* IO operations when bitmap is stored near all superblocks */
145 static int read_sb_page(struct mddev *mddev, loff_t offset,
146                         struct page *page,
147                         unsigned long index, int size)
148 {
149         /* choose a good rdev and read the page from there */
150
151         struct md_rdev *rdev;
152         sector_t target;
153
154         rdev_for_each(rdev, mddev) {
155                 if (! test_bit(In_sync, &rdev->flags)
156                     || test_bit(Faulty, &rdev->flags))
157                         continue;
158
159                 target = offset + index * (PAGE_SIZE/512);
160
161                 if (sync_page_io(rdev, target,
162                                  roundup(size, bdev_logical_block_size(rdev->bdev)),
163                                  page, READ, true)) {
164                         page->index = index;
165                         return 0;
166                 }
167         }
168         return -EIO;
169 }
170
171 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
172 {
173         /* Iterate the disks of an mddev, using rcu to protect access to the
174          * linked list, and raising the refcount of devices we return to ensure
175          * they don't disappear while in use.
176          * As devices are only added or removed when raid_disk is < 0 and
177          * nr_pending is 0 and In_sync is clear, the entries we return will
178          * still be in the same position on the list when we re-enter
179          * list_for_each_entry_continue_rcu.
180          */
181         rcu_read_lock();
182         if (rdev == NULL)
183                 /* start at the beginning */
184                 rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set);
185         else {
186                 /* release the previous rdev and start from there. */
187                 rdev_dec_pending(rdev, mddev);
188         }
189         list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
190                 if (rdev->raid_disk >= 0 &&
191                     !test_bit(Faulty, &rdev->flags)) {
192                         /* this is a usable devices */
193                         atomic_inc(&rdev->nr_pending);
194                         rcu_read_unlock();
195                         return rdev;
196                 }
197         }
198         rcu_read_unlock();
199         return NULL;
200 }
201
202 static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
203 {
204         struct md_rdev *rdev = NULL;
205         struct block_device *bdev;
206         struct mddev *mddev = bitmap->mddev;
207         struct bitmap_storage *store = &bitmap->storage;
208
209         while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
210                 int size = PAGE_SIZE;
211                 loff_t offset = mddev->bitmap_info.offset;
212
213                 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
214
215                 if (page->index == store->file_pages-1) {
216                         int last_page_size = store->bytes & (PAGE_SIZE-1);
217                         if (last_page_size == 0)
218                                 last_page_size = PAGE_SIZE;
219                         size = roundup(last_page_size,
220                                        bdev_logical_block_size(bdev));
221                 }
222                 /* Just make sure we aren't corrupting data or
223                  * metadata
224                  */
225                 if (mddev->external) {
226                         /* Bitmap could be anywhere. */
227                         if (rdev->sb_start + offset + (page->index
228                                                        * (PAGE_SIZE/512))
229                             > rdev->data_offset
230                             &&
231                             rdev->sb_start + offset
232                             < (rdev->data_offset + mddev->dev_sectors
233                              + (PAGE_SIZE/512)))
234                                 goto bad_alignment;
235                 } else if (offset < 0) {
236                         /* DATA  BITMAP METADATA  */
237                         if (offset
238                             + (long)(page->index * (PAGE_SIZE/512))
239                             + size/512 > 0)
240                                 /* bitmap runs in to metadata */
241                                 goto bad_alignment;
242                         if (rdev->data_offset + mddev->dev_sectors
243                             > rdev->sb_start + offset)
244                                 /* data runs in to bitmap */
245                                 goto bad_alignment;
246                 } else if (rdev->sb_start < rdev->data_offset) {
247                         /* METADATA BITMAP DATA */
248                         if (rdev->sb_start
249                             + offset
250                             + page->index*(PAGE_SIZE/512) + size/512
251                             > rdev->data_offset)
252                                 /* bitmap runs in to data */
253                                 goto bad_alignment;
254                 } else {
255                         /* DATA METADATA BITMAP - no problems */
256                 }
257                 md_super_write(mddev, rdev,
258                                rdev->sb_start + offset
259                                + page->index * (PAGE_SIZE/512),
260                                size,
261                                page);
262         }
263
264         if (wait)
265                 md_super_wait(mddev);
266         return 0;
267
268  bad_alignment:
269         return -EINVAL;
270 }
271
272 static void bitmap_file_kick(struct bitmap *bitmap);
273 /*
274  * write out a page to a file
275  */
276 static void write_page(struct bitmap *bitmap, struct page *page, int wait)
277 {
278         struct buffer_head *bh;
279
280         if (bitmap->storage.file == NULL) {
281                 switch (write_sb_page(bitmap, page, wait)) {
282                 case -EINVAL:
283                         set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
284                 }
285         } else {
286
287                 bh = page_buffers(page);
288
289                 while (bh && bh->b_blocknr) {
290                         atomic_inc(&bitmap->pending_writes);
291                         set_buffer_locked(bh);
292                         set_buffer_mapped(bh);
293                         submit_bh(WRITE | REQ_SYNC, bh);
294                         bh = bh->b_this_page;
295                 }
296
297                 if (wait)
298                         wait_event(bitmap->write_wait,
299                                    atomic_read(&bitmap->pending_writes)==0);
300         }
301         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
302                 bitmap_file_kick(bitmap);
303 }
304
305 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
306 {
307         struct bitmap *bitmap = bh->b_private;
308
309         if (!uptodate)
310                 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
311         if (atomic_dec_and_test(&bitmap->pending_writes))
312                 wake_up(&bitmap->write_wait);
313 }
314
315 /* copied from buffer.c */
316 static void
317 __clear_page_buffers(struct page *page)
318 {
319         ClearPagePrivate(page);
320         set_page_private(page, 0);
321         page_cache_release(page);
322 }
323 static void free_buffers(struct page *page)
324 {
325         struct buffer_head *bh;
326
327         if (!PagePrivate(page))
328                 return;
329
330         bh = page_buffers(page);
331         while (bh) {
332                 struct buffer_head *next = bh->b_this_page;
333                 free_buffer_head(bh);
334                 bh = next;
335         }
336         __clear_page_buffers(page);
337         put_page(page);
338 }
339
340 /* read a page from a file.
341  * We both read the page, and attach buffers to the page to record the
342  * address of each block (using bmap).  These addresses will be used
343  * to write the block later, completely bypassing the filesystem.
344  * This usage is similar to how swap files are handled, and allows us
345  * to write to a file with no concerns of memory allocation failing.
346  */
347 static int read_page(struct file *file, unsigned long index,
348                      struct bitmap *bitmap,
349                      unsigned long count,
350                      struct page *page)
351 {
352         int ret = 0;
353         struct inode *inode = file_inode(file);
354         struct buffer_head *bh;
355         sector_t block;
356
357         pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
358                  (unsigned long long)index << PAGE_SHIFT);
359
360         bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
361         if (!bh) {
362                 ret = -ENOMEM;
363                 goto out;
364         }
365         attach_page_buffers(page, bh);
366         block = index << (PAGE_SHIFT - inode->i_blkbits);
367         while (bh) {
368                 if (count == 0)
369                         bh->b_blocknr = 0;
370                 else {
371                         bh->b_blocknr = bmap(inode, block);
372                         if (bh->b_blocknr == 0) {
373                                 /* Cannot use this file! */
374                                 ret = -EINVAL;
375                                 goto out;
376                         }
377                         bh->b_bdev = inode->i_sb->s_bdev;
378                         if (count < (1<<inode->i_blkbits))
379                                 count = 0;
380                         else
381                                 count -= (1<<inode->i_blkbits);
382
383                         bh->b_end_io = end_bitmap_write;
384                         bh->b_private = bitmap;
385                         atomic_inc(&bitmap->pending_writes);
386                         set_buffer_locked(bh);
387                         set_buffer_mapped(bh);
388                         submit_bh(READ, bh);
389                 }
390                 block++;
391                 bh = bh->b_this_page;
392         }
393         page->index = index;
394
395         wait_event(bitmap->write_wait,
396                    atomic_read(&bitmap->pending_writes)==0);
397         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
398                 ret = -EIO;
399 out:
400         if (ret)
401                 printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
402                         (int)PAGE_SIZE,
403                         (unsigned long long)index << PAGE_SHIFT,
404                         ret);
405         return ret;
406 }
407
408 /*
409  * bitmap file superblock operations
410  */
411
412 /* update the event counter and sync the superblock to disk */
413 void bitmap_update_sb(struct bitmap *bitmap)
414 {
415         bitmap_super_t *sb;
416
417         if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
418                 return;
419         if (bitmap->mddev->bitmap_info.external)
420                 return;
421         if (!bitmap->storage.sb_page) /* no superblock */
422                 return;
423         sb = kmap_atomic(bitmap->storage.sb_page);
424         sb->events = cpu_to_le64(bitmap->mddev->events);
425         if (bitmap->mddev->events < bitmap->events_cleared)
426                 /* rocking back to read-only */
427                 bitmap->events_cleared = bitmap->mddev->events;
428         sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
429         sb->state = cpu_to_le32(bitmap->flags);
430         /* Just in case these have been changed via sysfs: */
431         sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
432         sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
433         /* This might have been changed by a reshape */
434         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
435         sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
436         sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
437                                            bitmap_info.space);
438         kunmap_atomic(sb);
439         write_page(bitmap, bitmap->storage.sb_page, 1);
440 }
441
442 /* print out the bitmap file superblock */
443 void bitmap_print_sb(struct bitmap *bitmap)
444 {
445         bitmap_super_t *sb;
446
447         if (!bitmap || !bitmap->storage.sb_page)
448                 return;
449         sb = kmap_atomic(bitmap->storage.sb_page);
450         printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
451         printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
452         printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
453         printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
454                                         *(__u32 *)(sb->uuid+0),
455                                         *(__u32 *)(sb->uuid+4),
456                                         *(__u32 *)(sb->uuid+8),
457                                         *(__u32 *)(sb->uuid+12));
458         printk(KERN_DEBUG "        events: %llu\n",
459                         (unsigned long long) le64_to_cpu(sb->events));
460         printk(KERN_DEBUG "events cleared: %llu\n",
461                         (unsigned long long) le64_to_cpu(sb->events_cleared));
462         printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
463         printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
464         printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
465         printk(KERN_DEBUG "     sync size: %llu KB\n",
466                         (unsigned long long)le64_to_cpu(sb->sync_size)/2);
467         printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
468         kunmap_atomic(sb);
469 }
470
471 /*
472  * bitmap_new_disk_sb
473  * @bitmap
474  *
475  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
476  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
477  * This function verifies 'bitmap_info' and populates the on-disk bitmap
478  * structure, which is to be written to disk.
479  *
480  * Returns: 0 on success, -Exxx on error
481  */
482 static int bitmap_new_disk_sb(struct bitmap *bitmap)
483 {
484         bitmap_super_t *sb;
485         unsigned long chunksize, daemon_sleep, write_behind;
486
487         bitmap->storage.sb_page = alloc_page(GFP_KERNEL);
488         if (bitmap->storage.sb_page == NULL)
489                 return -ENOMEM;
490         bitmap->storage.sb_page->index = 0;
491
492         sb = kmap_atomic(bitmap->storage.sb_page);
493
494         sb->magic = cpu_to_le32(BITMAP_MAGIC);
495         sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
496
497         chunksize = bitmap->mddev->bitmap_info.chunksize;
498         BUG_ON(!chunksize);
499         if (!is_power_of_2(chunksize)) {
500                 kunmap_atomic(sb);
501                 printk(KERN_ERR "bitmap chunksize not a power of 2\n");
502                 return -EINVAL;
503         }
504         sb->chunksize = cpu_to_le32(chunksize);
505
506         daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
507         if (!daemon_sleep ||
508             (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
509                 printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
510                 daemon_sleep = 5 * HZ;
511         }
512         sb->daemon_sleep = cpu_to_le32(daemon_sleep);
513         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
514
515         /*
516          * FIXME: write_behind for RAID1.  If not specified, what
517          * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
518          */
519         write_behind = bitmap->mddev->bitmap_info.max_write_behind;
520         if (write_behind > COUNTER_MAX)
521                 write_behind = COUNTER_MAX / 2;
522         sb->write_behind = cpu_to_le32(write_behind);
523         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
524
525         /* keep the array size field of the bitmap superblock up to date */
526         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
527
528         memcpy(sb->uuid, bitmap->mddev->uuid, 16);
529
530         set_bit(BITMAP_STALE, &bitmap->flags);
531         sb->state = cpu_to_le32(bitmap->flags);
532         bitmap->events_cleared = bitmap->mddev->events;
533         sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
534
535         kunmap_atomic(sb);
536
537         return 0;
538 }
539
540 /* read the superblock from the bitmap file and initialize some bitmap fields */
541 static int bitmap_read_sb(struct bitmap *bitmap)
542 {
543         char *reason = NULL;
544         bitmap_super_t *sb;
545         unsigned long chunksize, daemon_sleep, write_behind;
546         unsigned long long events;
547         unsigned long sectors_reserved = 0;
548         int err = -EINVAL;
549         struct page *sb_page;
550
551         if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
552                 chunksize = 128 * 1024 * 1024;
553                 daemon_sleep = 5 * HZ;
554                 write_behind = 0;
555                 set_bit(BITMAP_STALE, &bitmap->flags);
556                 err = 0;
557                 goto out_no_sb;
558         }
559         /* page 0 is the superblock, read it... */
560         sb_page = alloc_page(GFP_KERNEL);
561         if (!sb_page)
562                 return -ENOMEM;
563         bitmap->storage.sb_page = sb_page;
564
565         if (bitmap->storage.file) {
566                 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
567                 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
568
569                 err = read_page(bitmap->storage.file, 0,
570                                 bitmap, bytes, sb_page);
571         } else {
572                 err = read_sb_page(bitmap->mddev,
573                                    bitmap->mddev->bitmap_info.offset,
574                                    sb_page,
575                                    0, sizeof(bitmap_super_t));
576         }
577         if (err)
578                 return err;
579
580         sb = kmap_atomic(sb_page);
581
582         chunksize = le32_to_cpu(sb->chunksize);
583         daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
584         write_behind = le32_to_cpu(sb->write_behind);
585         sectors_reserved = le32_to_cpu(sb->sectors_reserved);
586
587         /* verify that the bitmap-specific fields are valid */
588         if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
589                 reason = "bad magic";
590         else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
591                  le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
592                 reason = "unrecognized superblock version";
593         else if (chunksize < 512)
594                 reason = "bitmap chunksize too small";
595         else if (!is_power_of_2(chunksize))
596                 reason = "bitmap chunksize not a power of 2";
597         else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
598                 reason = "daemon sleep period out of range";
599         else if (write_behind > COUNTER_MAX)
600                 reason = "write-behind limit out of range (0 - 16383)";
601         if (reason) {
602                 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
603                         bmname(bitmap), reason);
604                 goto out;
605         }
606
607         /* keep the array size field of the bitmap superblock up to date */
608         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
609
610         if (bitmap->mddev->persistent) {
611                 /*
612                  * We have a persistent array superblock, so compare the
613                  * bitmap's UUID and event counter to the mddev's
614                  */
615                 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
616                         printk(KERN_INFO
617                                "%s: bitmap superblock UUID mismatch\n",
618                                bmname(bitmap));
619                         goto out;
620                 }
621                 events = le64_to_cpu(sb->events);
622                 if (events < bitmap->mddev->events) {
623                         printk(KERN_INFO
624                                "%s: bitmap file is out of date (%llu < %llu) "
625                                "-- forcing full recovery\n",
626                                bmname(bitmap), events,
627                                (unsigned long long) bitmap->mddev->events);
628                         set_bit(BITMAP_STALE, &bitmap->flags);
629                 }
630         }
631
632         /* assign fields using values from superblock */
633         bitmap->flags |= le32_to_cpu(sb->state);
634         if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
635                 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
636         bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
637         err = 0;
638 out:
639         kunmap_atomic(sb);
640 out_no_sb:
641         if (test_bit(BITMAP_STALE, &bitmap->flags))
642                 bitmap->events_cleared = bitmap->mddev->events;
643         bitmap->mddev->bitmap_info.chunksize = chunksize;
644         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
645         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
646         if (bitmap->mddev->bitmap_info.space == 0 ||
647             bitmap->mddev->bitmap_info.space > sectors_reserved)
648                 bitmap->mddev->bitmap_info.space = sectors_reserved;
649         if (err)
650                 bitmap_print_sb(bitmap);
651         return err;
652 }
653
654 /*
655  * general bitmap file operations
656  */
657
658 /*
659  * on-disk bitmap:
660  *
661  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
662  * file a page at a time. There's a superblock at the start of the file.
663  */
664 /* calculate the index of the page that contains this bit */
665 static inline unsigned long file_page_index(struct bitmap_storage *store,
666                                             unsigned long chunk)
667 {
668         if (store->sb_page)
669                 chunk += sizeof(bitmap_super_t) << 3;
670         return chunk >> PAGE_BIT_SHIFT;
671 }
672
673 /* calculate the (bit) offset of this bit within a page */
674 static inline unsigned long file_page_offset(struct bitmap_storage *store,
675                                              unsigned long chunk)
676 {
677         if (store->sb_page)
678                 chunk += sizeof(bitmap_super_t) << 3;
679         return chunk & (PAGE_BITS - 1);
680 }
681
682 /*
683  * return a pointer to the page in the filemap that contains the given bit
684  *
685  */
686 static inline struct page *filemap_get_page(struct bitmap_storage *store,
687                                             unsigned long chunk)
688 {
689         if (file_page_index(store, chunk) >= store->file_pages)
690                 return NULL;
691         return store->filemap[file_page_index(store, chunk)];
692 }
693
694 static int bitmap_storage_alloc(struct bitmap_storage *store,
695                                 unsigned long chunks, int with_super)
696 {
697         int pnum;
698         unsigned long num_pages;
699         unsigned long bytes;
700
701         bytes = DIV_ROUND_UP(chunks, 8);
702         if (with_super)
703                 bytes += sizeof(bitmap_super_t);
704
705         num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
706
707         store->filemap = kmalloc(sizeof(struct page *)
708                                  * num_pages, GFP_KERNEL);
709         if (!store->filemap)
710                 return -ENOMEM;
711
712         if (with_super && !store->sb_page) {
713                 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
714                 if (store->sb_page == NULL)
715                         return -ENOMEM;
716                 store->sb_page->index = 0;
717         }
718         pnum = 0;
719         if (store->sb_page) {
720                 store->filemap[0] = store->sb_page;
721                 pnum = 1;
722         }
723         for ( ; pnum < num_pages; pnum++) {
724                 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
725                 if (!store->filemap[pnum]) {
726                         store->file_pages = pnum;
727                         return -ENOMEM;
728                 }
729                 store->filemap[pnum]->index = pnum;
730         }
731         store->file_pages = pnum;
732
733         /* We need 4 bits per page, rounded up to a multiple
734          * of sizeof(unsigned long) */
735         store->filemap_attr = kzalloc(
736                 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
737                 GFP_KERNEL);
738         if (!store->filemap_attr)
739                 return -ENOMEM;
740
741         store->bytes = bytes;
742
743         return 0;
744 }
745
746 static void bitmap_file_unmap(struct bitmap_storage *store)
747 {
748         struct page **map, *sb_page;
749         int pages;
750         struct file *file;
751
752         file = store->file;
753         map = store->filemap;
754         pages = store->file_pages;
755         sb_page = store->sb_page;
756
757         while (pages--)
758                 if (map[pages] != sb_page) /* 0 is sb_page, release it below */
759                         free_buffers(map[pages]);
760         kfree(map);
761         kfree(store->filemap_attr);
762
763         if (sb_page)
764                 free_buffers(sb_page);
765
766         if (file) {
767                 struct inode *inode = file_inode(file);
768                 invalidate_mapping_pages(inode->i_mapping, 0, -1);
769                 fput(file);
770         }
771 }
772
773 /*
774  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
775  * then it is no longer reliable, so we stop using it and we mark the file
776  * as failed in the superblock
777  */
778 static void bitmap_file_kick(struct bitmap *bitmap)
779 {
780         char *path, *ptr = NULL;
781
782         if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
783                 bitmap_update_sb(bitmap);
784
785                 if (bitmap->storage.file) {
786                         path = kmalloc(PAGE_SIZE, GFP_KERNEL);
787                         if (path)
788                                 ptr = d_path(&bitmap->storage.file->f_path,
789                                              path, PAGE_SIZE);
790
791                         printk(KERN_ALERT
792                               "%s: kicking failed bitmap file %s from array!\n",
793                               bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
794
795                         kfree(path);
796                 } else
797                         printk(KERN_ALERT
798                                "%s: disabling internal bitmap due to errors\n",
799                                bmname(bitmap));
800         }
801 }
802
803 enum bitmap_page_attr {
804         BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
805         BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
806                                     * i.e. counter is 1 or 2. */
807         BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
808 };
809
810 static inline void set_page_attr(struct bitmap *bitmap, int pnum,
811                                  enum bitmap_page_attr attr)
812 {
813         set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
814 }
815
816 static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
817                                    enum bitmap_page_attr attr)
818 {
819         clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
820 }
821
822 static inline int test_page_attr(struct bitmap *bitmap, int pnum,
823                                  enum bitmap_page_attr attr)
824 {
825         return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
826 }
827
828 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
829                                            enum bitmap_page_attr attr)
830 {
831         return test_and_clear_bit((pnum<<2) + attr,
832                                   bitmap->storage.filemap_attr);
833 }
834 /*
835  * bitmap_file_set_bit -- called before performing a write to the md device
836  * to set (and eventually sync) a particular bit in the bitmap file
837  *
838  * we set the bit immediately, then we record the page number so that
839  * when an unplug occurs, we can flush the dirty pages out to disk
840  */
841 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
842 {
843         unsigned long bit;
844         struct page *page;
845         void *kaddr;
846         unsigned long chunk = block >> bitmap->counts.chunkshift;
847
848         page = filemap_get_page(&bitmap->storage, chunk);
849         if (!page)
850                 return;
851         bit = file_page_offset(&bitmap->storage, chunk);
852
853         /* set the bit */
854         kaddr = kmap_atomic(page);
855         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
856                 set_bit(bit, kaddr);
857         else
858                 set_bit_le(bit, kaddr);
859         kunmap_atomic(kaddr);
860         pr_debug("set file bit %lu page %lu\n", bit, page->index);
861         /* record page number so it gets flushed to disk when unplug occurs */
862         set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
863 }
864
865 static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
866 {
867         unsigned long bit;
868         struct page *page;
869         void *paddr;
870         unsigned long chunk = block >> bitmap->counts.chunkshift;
871
872         page = filemap_get_page(&bitmap->storage, chunk);
873         if (!page)
874                 return;
875         bit = file_page_offset(&bitmap->storage, chunk);
876         paddr = kmap_atomic(page);
877         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
878                 clear_bit(bit, paddr);
879         else
880                 clear_bit_le(bit, paddr);
881         kunmap_atomic(paddr);
882         if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
883                 set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
884                 bitmap->allclean = 0;
885         }
886 }
887
888 /* this gets called when the md device is ready to unplug its underlying
889  * (slave) device queues -- before we let any writes go down, we need to
890  * sync the dirty pages of the bitmap file to disk */
891 void bitmap_unplug(struct bitmap *bitmap)
892 {
893         unsigned long i;
894         int dirty, need_write;
895
896         if (!bitmap || !bitmap->storage.filemap ||
897             test_bit(BITMAP_STALE, &bitmap->flags))
898                 return;
899
900         /* look at each page to see if there are any set bits that need to be
901          * flushed out to disk */
902         for (i = 0; i < bitmap->storage.file_pages; i++) {
903                 if (!bitmap->storage.filemap)
904                         return;
905                 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
906                 need_write = test_and_clear_page_attr(bitmap, i,
907                                                       BITMAP_PAGE_NEEDWRITE);
908                 if (dirty || need_write) {
909                         clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
910                         write_page(bitmap, bitmap->storage.filemap[i], 0);
911                 }
912         }
913         if (bitmap->storage.file)
914                 wait_event(bitmap->write_wait,
915                            atomic_read(&bitmap->pending_writes)==0);
916         else
917                 md_super_wait(bitmap->mddev);
918
919         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
920                 bitmap_file_kick(bitmap);
921 }
922 EXPORT_SYMBOL(bitmap_unplug);
923
924 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
925 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
926  * the in-memory bitmap from the on-disk bitmap -- also, sets up the
927  * memory mapping of the bitmap file
928  * Special cases:
929  *   if there's no bitmap file, or if the bitmap file had been
930  *   previously kicked from the array, we mark all the bits as
931  *   1's in order to cause a full resync.
932  *
933  * We ignore all bits for sectors that end earlier than 'start'.
934  * This is used when reading an out-of-date bitmap...
935  */
936 static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
937 {
938         unsigned long i, chunks, index, oldindex, bit;
939         struct page *page = NULL;
940         unsigned long bit_cnt = 0;
941         struct file *file;
942         unsigned long offset;
943         int outofdate;
944         int ret = -ENOSPC;
945         void *paddr;
946         struct bitmap_storage *store = &bitmap->storage;
947
948         chunks = bitmap->counts.chunks;
949         file = store->file;
950
951         if (!file && !bitmap->mddev->bitmap_info.offset) {
952                 /* No permanent bitmap - fill with '1s'. */
953                 store->filemap = NULL;
954                 store->file_pages = 0;
955                 for (i = 0; i < chunks ; i++) {
956                         /* if the disk bit is set, set the memory bit */
957                         int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
958                                       >= start);
959                         bitmap_set_memory_bits(bitmap,
960                                                (sector_t)i << bitmap->counts.chunkshift,
961                                                needed);
962                 }
963                 return 0;
964         }
965
966         outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
967         if (outofdate)
968                 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
969                         "recovery\n", bmname(bitmap));
970
971         if (file && i_size_read(file->f_mapping->host) < store->bytes) {
972                 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
973                        bmname(bitmap),
974                        (unsigned long) i_size_read(file->f_mapping->host),
975                        store->bytes);
976                 goto err;
977         }
978
979         oldindex = ~0L;
980         offset = 0;
981         if (!bitmap->mddev->bitmap_info.external)
982                 offset = sizeof(bitmap_super_t);
983
984         for (i = 0; i < chunks; i++) {
985                 int b;
986                 index = file_page_index(&bitmap->storage, i);
987                 bit = file_page_offset(&bitmap->storage, i);
988                 if (index != oldindex) { /* this is a new page, read it in */
989                         int count;
990                         /* unmap the old page, we're done with it */
991                         if (index == store->file_pages-1)
992                                 count = store->bytes - index * PAGE_SIZE;
993                         else
994                                 count = PAGE_SIZE;
995                         page = store->filemap[index];
996                         if (file)
997                                 ret = read_page(file, index, bitmap,
998                                                 count, page);
999                         else
1000                                 ret = read_sb_page(
1001                                         bitmap->mddev,
1002                                         bitmap->mddev->bitmap_info.offset,
1003                                         page,
1004                                         index, count);
1005
1006                         if (ret)
1007                                 goto err;
1008
1009                         oldindex = index;
1010
1011                         if (outofdate) {
1012                                 /*
1013                                  * if bitmap is out of date, dirty the
1014                                  * whole page and write it out
1015                                  */
1016                                 paddr = kmap_atomic(page);
1017                                 memset(paddr + offset, 0xff,
1018                                        PAGE_SIZE - offset);
1019                                 kunmap_atomic(paddr);
1020                                 write_page(bitmap, page, 1);
1021
1022                                 ret = -EIO;
1023                                 if (test_bit(BITMAP_WRITE_ERROR,
1024                                              &bitmap->flags))
1025                                         goto err;
1026                         }
1027                 }
1028                 paddr = kmap_atomic(page);
1029                 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1030                         b = test_bit(bit, paddr);
1031                 else
1032                         b = test_bit_le(bit, paddr);
1033                 kunmap_atomic(paddr);
1034                 if (b) {
1035                         /* if the disk bit is set, set the memory bit */
1036                         int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1037                                       >= start);
1038                         bitmap_set_memory_bits(bitmap,
1039                                                (sector_t)i << bitmap->counts.chunkshift,
1040                                                needed);
1041                         bit_cnt++;
1042                 }
1043                 offset = 0;
1044         }
1045
1046         printk(KERN_INFO "%s: bitmap initialized from disk: "
1047                "read %lu pages, set %lu of %lu bits\n",
1048                bmname(bitmap), store->file_pages,
1049                bit_cnt, chunks);
1050
1051         return 0;
1052
1053  err:
1054         printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1055                bmname(bitmap), ret);
1056         return ret;
1057 }
1058
1059 void bitmap_write_all(struct bitmap *bitmap)
1060 {
1061         /* We don't actually write all bitmap blocks here,
1062          * just flag them as needing to be written
1063          */
1064         int i;
1065
1066         if (!bitmap || !bitmap->storage.filemap)
1067                 return;
1068         if (bitmap->storage.file)
1069                 /* Only one copy, so nothing needed */
1070                 return;
1071
1072         for (i = 0; i < bitmap->storage.file_pages; i++)
1073                 set_page_attr(bitmap, i,
1074                               BITMAP_PAGE_NEEDWRITE);
1075         bitmap->allclean = 0;
1076 }
1077
1078 static void bitmap_count_page(struct bitmap_counts *bitmap,
1079                               sector_t offset, int inc)
1080 {
1081         sector_t chunk = offset >> bitmap->chunkshift;
1082         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1083         bitmap->bp[page].count += inc;
1084         bitmap_checkfree(bitmap, page);
1085 }
1086
1087 static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1088 {
1089         sector_t chunk = offset >> bitmap->chunkshift;
1090         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1091         struct bitmap_page *bp = &bitmap->bp[page];
1092
1093         if (!bp->pending)
1094                 bp->pending = 1;
1095 }
1096
1097 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1098                                             sector_t offset, sector_t *blocks,
1099                                             int create);
1100
1101 /*
1102  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1103  *                      out to disk
1104  */
1105
1106 void bitmap_daemon_work(struct mddev *mddev)
1107 {
1108         struct bitmap *bitmap;
1109         unsigned long j;
1110         unsigned long nextpage;
1111         sector_t blocks;
1112         struct bitmap_counts *counts;
1113
1114         /* Use a mutex to guard daemon_work against
1115          * bitmap_destroy.
1116          */
1117         mutex_lock(&mddev->bitmap_info.mutex);
1118         bitmap = mddev->bitmap;
1119         if (bitmap == NULL) {
1120                 mutex_unlock(&mddev->bitmap_info.mutex);
1121                 return;
1122         }
1123         if (time_before(jiffies, bitmap->daemon_lastrun
1124                         + mddev->bitmap_info.daemon_sleep))
1125                 goto done;
1126
1127         bitmap->daemon_lastrun = jiffies;
1128         if (bitmap->allclean) {
1129                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1130                 goto done;
1131         }
1132         bitmap->allclean = 1;
1133
1134         /* Any file-page which is PENDING now needs to be written.
1135          * So set NEEDWRITE now, then after we make any last-minute changes
1136          * we will write it.
1137          */
1138         for (j = 0; j < bitmap->storage.file_pages; j++)
1139                 if (test_and_clear_page_attr(bitmap, j,
1140                                              BITMAP_PAGE_PENDING))
1141                         set_page_attr(bitmap, j,
1142                                       BITMAP_PAGE_NEEDWRITE);
1143
1144         if (bitmap->need_sync &&
1145             mddev->bitmap_info.external == 0) {
1146                 /* Arrange for superblock update as well as
1147                  * other changes */
1148                 bitmap_super_t *sb;
1149                 bitmap->need_sync = 0;
1150                 if (bitmap->storage.filemap) {
1151                         sb = kmap_atomic(bitmap->storage.sb_page);
1152                         sb->events_cleared =
1153                                 cpu_to_le64(bitmap->events_cleared);
1154                         kunmap_atomic(sb);
1155                         set_page_attr(bitmap, 0,
1156                                       BITMAP_PAGE_NEEDWRITE);
1157                 }
1158         }
1159         /* Now look at the bitmap counters and if any are '2' or '1',
1160          * decrement and handle accordingly.
1161          */
1162         counts = &bitmap->counts;
1163         spin_lock_irq(&counts->lock);
1164         nextpage = 0;
1165         for (j = 0; j < counts->chunks; j++) {
1166                 bitmap_counter_t *bmc;
1167                 sector_t  block = (sector_t)j << counts->chunkshift;
1168
1169                 if (j == nextpage) {
1170                         nextpage += PAGE_COUNTER_RATIO;
1171                         if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1172                                 j |= PAGE_COUNTER_MASK;
1173                                 continue;
1174                         }
1175                         counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1176                 }
1177                 bmc = bitmap_get_counter(counts,
1178                                          block,
1179                                          &blocks, 0);
1180
1181                 if (!bmc) {
1182                         j |= PAGE_COUNTER_MASK;
1183                         continue;
1184                 }
1185                 if (*bmc == 1 && !bitmap->need_sync) {
1186                         /* We can clear the bit */
1187                         *bmc = 0;
1188                         bitmap_count_page(counts, block, -1);
1189                         bitmap_file_clear_bit(bitmap, block);
1190                 } else if (*bmc && *bmc <= 2) {
1191                         *bmc = 1;
1192                         bitmap_set_pending(counts, block);
1193                         bitmap->allclean = 0;
1194                 }
1195         }
1196         spin_unlock_irq(&counts->lock);
1197
1198         /* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1199          * DIRTY pages need to be written by bitmap_unplug so it can wait
1200          * for them.
1201          * If we find any DIRTY page we stop there and let bitmap_unplug
1202          * handle all the rest.  This is important in the case where
1203          * the first blocking holds the superblock and it has been updated.
1204          * We mustn't write any other blocks before the superblock.
1205          */
1206         for (j = 0;
1207              j < bitmap->storage.file_pages
1208                      && !test_bit(BITMAP_STALE, &bitmap->flags);
1209              j++) {
1210
1211                 if (test_page_attr(bitmap, j,
1212                                    BITMAP_PAGE_DIRTY))
1213                         /* bitmap_unplug will handle the rest */
1214                         break;
1215                 if (test_and_clear_page_attr(bitmap, j,
1216                                              BITMAP_PAGE_NEEDWRITE)) {
1217                         write_page(bitmap, bitmap->storage.filemap[j], 0);
1218                 }
1219         }
1220
1221  done:
1222         if (bitmap->allclean == 0)
1223                 mddev->thread->timeout =
1224                         mddev->bitmap_info.daemon_sleep;
1225         mutex_unlock(&mddev->bitmap_info.mutex);
1226 }
1227
1228 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1229                                             sector_t offset, sector_t *blocks,
1230                                             int create)
1231 __releases(bitmap->lock)
1232 __acquires(bitmap->lock)
1233 {
1234         /* If 'create', we might release the lock and reclaim it.
1235          * The lock must have been taken with interrupts enabled.
1236          * If !create, we don't release the lock.
1237          */
1238         sector_t chunk = offset >> bitmap->chunkshift;
1239         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1240         unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1241         sector_t csize;
1242         int err;
1243
1244         err = bitmap_checkpage(bitmap, page, create);
1245
1246         if (bitmap->bp[page].hijacked ||
1247             bitmap->bp[page].map == NULL)
1248                 csize = ((sector_t)1) << (bitmap->chunkshift +
1249                                           PAGE_COUNTER_SHIFT - 1);
1250         else
1251                 csize = ((sector_t)1) << bitmap->chunkshift;
1252         *blocks = csize - (offset & (csize - 1));
1253
1254         if (err < 0)
1255                 return NULL;
1256
1257         /* now locked ... */
1258
1259         if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1260                 /* should we use the first or second counter field
1261                  * of the hijacked pointer? */
1262                 int hi = (pageoff > PAGE_COUNTER_MASK);
1263                 return  &((bitmap_counter_t *)
1264                           &bitmap->bp[page].map)[hi];
1265         } else /* page is allocated */
1266                 return (bitmap_counter_t *)
1267                         &(bitmap->bp[page].map[pageoff]);
1268 }
1269
1270 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1271 {
1272         if (!bitmap)
1273                 return 0;
1274
1275         if (behind) {
1276                 int bw;
1277                 atomic_inc(&bitmap->behind_writes);
1278                 bw = atomic_read(&bitmap->behind_writes);
1279                 if (bw > bitmap->behind_writes_used)
1280                         bitmap->behind_writes_used = bw;
1281
1282                 pr_debug("inc write-behind count %d/%lu\n",
1283                          bw, bitmap->mddev->bitmap_info.max_write_behind);
1284         }
1285
1286         while (sectors) {
1287                 sector_t blocks;
1288                 bitmap_counter_t *bmc;
1289
1290                 spin_lock_irq(&bitmap->counts.lock);
1291                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1292                 if (!bmc) {
1293                         spin_unlock_irq(&bitmap->counts.lock);
1294                         return 0;
1295                 }
1296
1297                 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1298                         DEFINE_WAIT(__wait);
1299                         /* note that it is safe to do the prepare_to_wait
1300                          * after the test as long as we do it before dropping
1301                          * the spinlock.
1302                          */
1303                         prepare_to_wait(&bitmap->overflow_wait, &__wait,
1304                                         TASK_UNINTERRUPTIBLE);
1305                         spin_unlock_irq(&bitmap->counts.lock);
1306                         schedule();
1307                         finish_wait(&bitmap->overflow_wait, &__wait);
1308                         continue;
1309                 }
1310
1311                 switch (*bmc) {
1312                 case 0:
1313                         bitmap_file_set_bit(bitmap, offset);
1314                         bitmap_count_page(&bitmap->counts, offset, 1);
1315                         /* fall through */
1316                 case 1:
1317                         *bmc = 2;
1318                 }
1319
1320                 (*bmc)++;
1321
1322                 spin_unlock_irq(&bitmap->counts.lock);
1323
1324                 offset += blocks;
1325                 if (sectors > blocks)
1326                         sectors -= blocks;
1327                 else
1328                         sectors = 0;
1329         }
1330         return 0;
1331 }
1332 EXPORT_SYMBOL(bitmap_startwrite);
1333
1334 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1335                      int success, int behind)
1336 {
1337         if (!bitmap)
1338                 return;
1339         if (behind) {
1340                 if (atomic_dec_and_test(&bitmap->behind_writes))
1341                         wake_up(&bitmap->behind_wait);
1342                 pr_debug("dec write-behind count %d/%lu\n",
1343                          atomic_read(&bitmap->behind_writes),
1344                          bitmap->mddev->bitmap_info.max_write_behind);
1345         }
1346
1347         while (sectors) {
1348                 sector_t blocks;
1349                 unsigned long flags;
1350                 bitmap_counter_t *bmc;
1351
1352                 spin_lock_irqsave(&bitmap->counts.lock, flags);
1353                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1354                 if (!bmc) {
1355                         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1356                         return;
1357                 }
1358
1359                 if (success && !bitmap->mddev->degraded &&
1360                     bitmap->events_cleared < bitmap->mddev->events) {
1361                         bitmap->events_cleared = bitmap->mddev->events;
1362                         bitmap->need_sync = 1;
1363                         sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1364                 }
1365
1366                 if (!success && !NEEDED(*bmc))
1367                         *bmc |= NEEDED_MASK;
1368
1369                 if (COUNTER(*bmc) == COUNTER_MAX)
1370                         wake_up(&bitmap->overflow_wait);
1371
1372                 (*bmc)--;
1373                 if (*bmc <= 2) {
1374                         bitmap_set_pending(&bitmap->counts, offset);
1375                         bitmap->allclean = 0;
1376                 }
1377                 spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1378                 offset += blocks;
1379                 if (sectors > blocks)
1380                         sectors -= blocks;
1381                 else
1382                         sectors = 0;
1383         }
1384 }
1385 EXPORT_SYMBOL(bitmap_endwrite);
1386
1387 static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1388                                int degraded)
1389 {
1390         bitmap_counter_t *bmc;
1391         int rv;
1392         if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1393                 *blocks = 1024;
1394                 return 1; /* always resync if no bitmap */
1395         }
1396         spin_lock_irq(&bitmap->counts.lock);
1397         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1398         rv = 0;
1399         if (bmc) {
1400                 /* locked */
1401                 if (RESYNC(*bmc))
1402                         rv = 1;
1403                 else if (NEEDED(*bmc)) {
1404                         rv = 1;
1405                         if (!degraded) { /* don't set/clear bits if degraded */
1406                                 *bmc |= RESYNC_MASK;
1407                                 *bmc &= ~NEEDED_MASK;
1408                         }
1409                 }
1410         }
1411         spin_unlock_irq(&bitmap->counts.lock);
1412         return rv;
1413 }
1414
1415 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1416                       int degraded)
1417 {
1418         /* bitmap_start_sync must always report on multiples of whole
1419          * pages, otherwise resync (which is very PAGE_SIZE based) will
1420          * get confused.
1421          * So call __bitmap_start_sync repeatedly (if needed) until
1422          * At least PAGE_SIZE>>9 blocks are covered.
1423          * Return the 'or' of the result.
1424          */
1425         int rv = 0;
1426         sector_t blocks1;
1427
1428         *blocks = 0;
1429         while (*blocks < (PAGE_SIZE>>9)) {
1430                 rv |= __bitmap_start_sync(bitmap, offset,
1431                                           &blocks1, degraded);
1432                 offset += blocks1;
1433                 *blocks += blocks1;
1434         }
1435         return rv;
1436 }
1437 EXPORT_SYMBOL(bitmap_start_sync);
1438
1439 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1440 {
1441         bitmap_counter_t *bmc;
1442         unsigned long flags;
1443
1444         if (bitmap == NULL) {
1445                 *blocks = 1024;
1446                 return;
1447         }
1448         spin_lock_irqsave(&bitmap->counts.lock, flags);
1449         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1450         if (bmc == NULL)
1451                 goto unlock;
1452         /* locked */
1453         if (RESYNC(*bmc)) {
1454                 *bmc &= ~RESYNC_MASK;
1455
1456                 if (!NEEDED(*bmc) && aborted)
1457                         *bmc |= NEEDED_MASK;
1458                 else {
1459                         if (*bmc <= 2) {
1460                                 bitmap_set_pending(&bitmap->counts, offset);
1461                                 bitmap->allclean = 0;
1462                         }
1463                 }
1464         }
1465  unlock:
1466         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1467 }
1468 EXPORT_SYMBOL(bitmap_end_sync);
1469
1470 void bitmap_close_sync(struct bitmap *bitmap)
1471 {
1472         /* Sync has finished, and any bitmap chunks that weren't synced
1473          * properly have been aborted.  It remains to us to clear the
1474          * RESYNC bit wherever it is still on
1475          */
1476         sector_t sector = 0;
1477         sector_t blocks;
1478         if (!bitmap)
1479                 return;
1480         while (sector < bitmap->mddev->resync_max_sectors) {
1481                 bitmap_end_sync(bitmap, sector, &blocks, 0);
1482                 sector += blocks;
1483         }
1484 }
1485 EXPORT_SYMBOL(bitmap_close_sync);
1486
1487 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1488 {
1489         sector_t s = 0;
1490         sector_t blocks;
1491
1492         if (!bitmap)
1493                 return;
1494         if (sector == 0) {
1495                 bitmap->last_end_sync = jiffies;
1496                 return;
1497         }
1498         if (time_before(jiffies, (bitmap->last_end_sync
1499                                   + bitmap->mddev->bitmap_info.daemon_sleep)))
1500                 return;
1501         wait_event(bitmap->mddev->recovery_wait,
1502                    atomic_read(&bitmap->mddev->recovery_active) == 0);
1503
1504         bitmap->mddev->curr_resync_completed = sector;
1505         set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1506         sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1507         s = 0;
1508         while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1509                 bitmap_end_sync(bitmap, s, &blocks, 0);
1510                 s += blocks;
1511         }
1512         bitmap->last_end_sync = jiffies;
1513         sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1514 }
1515 EXPORT_SYMBOL(bitmap_cond_end_sync);
1516
1517 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1518 {
1519         /* For each chunk covered by any of these sectors, set the
1520          * counter to 2 and possibly set resync_needed.  They should all
1521          * be 0 at this point
1522          */
1523
1524         sector_t secs;
1525         bitmap_counter_t *bmc;
1526         spin_lock_irq(&bitmap->counts.lock);
1527         bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1528         if (!bmc) {
1529                 spin_unlock_irq(&bitmap->counts.lock);
1530                 return;
1531         }
1532         if (!*bmc) {
1533                 *bmc = 2 | (needed ? NEEDED_MASK : 0);
1534                 bitmap_count_page(&bitmap->counts, offset, 1);
1535                 bitmap_set_pending(&bitmap->counts, offset);
1536                 bitmap->allclean = 0;
1537         }
1538         spin_unlock_irq(&bitmap->counts.lock);
1539 }
1540
1541 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
1542 void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1543 {
1544         unsigned long chunk;
1545
1546         for (chunk = s; chunk <= e; chunk++) {
1547                 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1548                 bitmap_set_memory_bits(bitmap, sec, 1);
1549                 bitmap_file_set_bit(bitmap, sec);
1550                 if (sec < bitmap->mddev->recovery_cp)
1551                         /* We are asserting that the array is dirty,
1552                          * so move the recovery_cp address back so
1553                          * that it is obvious that it is dirty
1554                          */
1555                         bitmap->mddev->recovery_cp = sec;
1556         }
1557 }
1558
1559 /*
1560  * flush out any pending updates
1561  */
1562 void bitmap_flush(struct mddev *mddev)
1563 {
1564         struct bitmap *bitmap = mddev->bitmap;
1565         long sleep;
1566
1567         if (!bitmap) /* there was no bitmap */
1568                 return;
1569
1570         /* run the daemon_work three time to ensure everything is flushed
1571          * that can be
1572          */
1573         sleep = mddev->bitmap_info.daemon_sleep * 2;
1574         bitmap->daemon_lastrun -= sleep;
1575         bitmap_daemon_work(mddev);
1576         bitmap->daemon_lastrun -= sleep;
1577         bitmap_daemon_work(mddev);
1578         bitmap->daemon_lastrun -= sleep;
1579         bitmap_daemon_work(mddev);
1580         bitmap_update_sb(bitmap);
1581 }
1582
1583 /*
1584  * free memory that was allocated
1585  */
1586 static void bitmap_free(struct bitmap *bitmap)
1587 {
1588         unsigned long k, pages;
1589         struct bitmap_page *bp;
1590
1591         if (!bitmap) /* there was no bitmap */
1592                 return;
1593
1594         /* Shouldn't be needed - but just in case.... */
1595         wait_event(bitmap->write_wait,
1596                    atomic_read(&bitmap->pending_writes) == 0);
1597
1598         /* release the bitmap file  */
1599         bitmap_file_unmap(&bitmap->storage);
1600
1601         bp = bitmap->counts.bp;
1602         pages = bitmap->counts.pages;
1603
1604         /* free all allocated memory */
1605
1606         if (bp) /* deallocate the page memory */
1607                 for (k = 0; k < pages; k++)
1608                         if (bp[k].map && !bp[k].hijacked)
1609                                 kfree(bp[k].map);
1610         kfree(bp);
1611         kfree(bitmap);
1612 }
1613
1614 void bitmap_destroy(struct mddev *mddev)
1615 {
1616         struct bitmap *bitmap = mddev->bitmap;
1617
1618         if (!bitmap) /* there was no bitmap */
1619                 return;
1620
1621         mutex_lock(&mddev->bitmap_info.mutex);
1622         mddev->bitmap = NULL; /* disconnect from the md device */
1623         mutex_unlock(&mddev->bitmap_info.mutex);
1624         if (mddev->thread)
1625                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1626
1627         if (bitmap->sysfs_can_clear)
1628                 sysfs_put(bitmap->sysfs_can_clear);
1629
1630         bitmap_free(bitmap);
1631 }
1632
1633 /*
1634  * initialize the bitmap structure
1635  * if this returns an error, bitmap_destroy must be called to do clean up
1636  */
1637 int bitmap_create(struct mddev *mddev)
1638 {
1639         struct bitmap *bitmap;
1640         sector_t blocks = mddev->resync_max_sectors;
1641         struct file *file = mddev->bitmap_info.file;
1642         int err;
1643         struct kernfs_node *bm = NULL;
1644
1645         BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1646
1647         BUG_ON(file && mddev->bitmap_info.offset);
1648
1649         bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1650         if (!bitmap)
1651                 return -ENOMEM;
1652
1653         spin_lock_init(&bitmap->counts.lock);
1654         atomic_set(&bitmap->pending_writes, 0);
1655         init_waitqueue_head(&bitmap->write_wait);
1656         init_waitqueue_head(&bitmap->overflow_wait);
1657         init_waitqueue_head(&bitmap->behind_wait);
1658
1659         bitmap->mddev = mddev;
1660
1661         if (mddev->kobj.sd)
1662                 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1663         if (bm) {
1664                 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1665                 sysfs_put(bm);
1666         } else
1667                 bitmap->sysfs_can_clear = NULL;
1668
1669         bitmap->storage.file = file;
1670         if (file) {
1671                 get_file(file);
1672                 /* As future accesses to this file will use bmap,
1673                  * and bypass the page cache, we must sync the file
1674                  * first.
1675                  */
1676                 vfs_fsync(file, 1);
1677         }
1678         /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1679         if (!mddev->bitmap_info.external) {
1680                 /*
1681                  * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1682                  * instructing us to create a new on-disk bitmap instance.
1683                  */
1684                 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1685                         err = bitmap_new_disk_sb(bitmap);
1686                 else
1687                         err = bitmap_read_sb(bitmap);
1688         } else {
1689                 err = 0;
1690                 if (mddev->bitmap_info.chunksize == 0 ||
1691                     mddev->bitmap_info.daemon_sleep == 0)
1692                         /* chunksize and time_base need to be
1693                          * set first. */
1694                         err = -EINVAL;
1695         }
1696         if (err)
1697                 goto error;
1698
1699         bitmap->daemon_lastrun = jiffies;
1700         err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
1701         if (err)
1702                 goto error;
1703
1704         printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1705                bitmap->counts.pages, bmname(bitmap));
1706
1707         mddev->bitmap = bitmap;
1708         return test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
1709
1710  error:
1711         bitmap_free(bitmap);
1712         return err;
1713 }
1714
1715 int bitmap_load(struct mddev *mddev)
1716 {
1717         int err = 0;
1718         sector_t start = 0;
1719         sector_t sector = 0;
1720         struct bitmap *bitmap = mddev->bitmap;
1721
1722         if (!bitmap)
1723                 goto out;
1724
1725         /* Clear out old bitmap info first:  Either there is none, or we
1726          * are resuming after someone else has possibly changed things,
1727          * so we should forget old cached info.
1728          * All chunks should be clean, but some might need_sync.
1729          */
1730         while (sector < mddev->resync_max_sectors) {
1731                 sector_t blocks;
1732                 bitmap_start_sync(bitmap, sector, &blocks, 0);
1733                 sector += blocks;
1734         }
1735         bitmap_close_sync(bitmap);
1736
1737         if (mddev->degraded == 0
1738             || bitmap->events_cleared == mddev->events)
1739                 /* no need to keep dirty bits to optimise a
1740                  * re-add of a missing device */
1741                 start = mddev->recovery_cp;
1742
1743         mutex_lock(&mddev->bitmap_info.mutex);
1744         err = bitmap_init_from_disk(bitmap, start);
1745         mutex_unlock(&mddev->bitmap_info.mutex);
1746
1747         if (err)
1748                 goto out;
1749         clear_bit(BITMAP_STALE, &bitmap->flags);
1750
1751         /* Kick recovery in case any bits were set */
1752         set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1753
1754         mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1755         md_wakeup_thread(mddev->thread);
1756
1757         bitmap_update_sb(bitmap);
1758
1759         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1760                 err = -EIO;
1761 out:
1762         return err;
1763 }
1764 EXPORT_SYMBOL_GPL(bitmap_load);
1765
1766 void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
1767 {
1768         unsigned long chunk_kb;
1769         struct bitmap_counts *counts;
1770
1771         if (!bitmap)
1772                 return;
1773
1774         counts = &bitmap->counts;
1775
1776         chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
1777         seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
1778                    "%lu%s chunk",
1779                    counts->pages - counts->missing_pages,
1780                    counts->pages,
1781                    (counts->pages - counts->missing_pages)
1782                    << (PAGE_SHIFT - 10),
1783                    chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
1784                    chunk_kb ? "KB" : "B");
1785         if (bitmap->storage.file) {
1786                 seq_printf(seq, ", file: ");
1787                 seq_path(seq, &bitmap->storage.file->f_path, " \t\n");
1788         }
1789
1790         seq_printf(seq, "\n");
1791 }
1792
1793 int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
1794                   int chunksize, int init)
1795 {
1796         /* If chunk_size is 0, choose an appropriate chunk size.
1797          * Then possibly allocate new storage space.
1798          * Then quiesce, copy bits, replace bitmap, and re-start
1799          *
1800          * This function is called both to set up the initial bitmap
1801          * and to resize the bitmap while the array is active.
1802          * If this happens as a result of the array being resized,
1803          * chunksize will be zero, and we need to choose a suitable
1804          * chunksize, otherwise we use what we are given.
1805          */
1806         struct bitmap_storage store;
1807         struct bitmap_counts old_counts;
1808         unsigned long chunks;
1809         sector_t block;
1810         sector_t old_blocks, new_blocks;
1811         int chunkshift;
1812         int ret = 0;
1813         long pages;
1814         struct bitmap_page *new_bp;
1815
1816         if (chunksize == 0) {
1817                 /* If there is enough space, leave the chunk size unchanged,
1818                  * else increase by factor of two until there is enough space.
1819                  */
1820                 long bytes;
1821                 long space = bitmap->mddev->bitmap_info.space;
1822
1823                 if (space == 0) {
1824                         /* We don't know how much space there is, so limit
1825                          * to current size - in sectors.
1826                          */
1827                         bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
1828                         if (!bitmap->mddev->bitmap_info.external)
1829                                 bytes += sizeof(bitmap_super_t);
1830                         space = DIV_ROUND_UP(bytes, 512);
1831                         bitmap->mddev->bitmap_info.space = space;
1832                 }
1833                 chunkshift = bitmap->counts.chunkshift;
1834                 chunkshift--;
1835                 do {
1836                         /* 'chunkshift' is shift from block size to chunk size */
1837                         chunkshift++;
1838                         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1839                         bytes = DIV_ROUND_UP(chunks, 8);
1840                         if (!bitmap->mddev->bitmap_info.external)
1841                                 bytes += sizeof(bitmap_super_t);
1842                 } while (bytes > (space << 9));
1843         } else
1844                 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
1845
1846         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1847         memset(&store, 0, sizeof(store));
1848         if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
1849                 ret = bitmap_storage_alloc(&store, chunks,
1850                                            !bitmap->mddev->bitmap_info.external);
1851         if (ret)
1852                 goto err;
1853
1854         pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
1855
1856         new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
1857         ret = -ENOMEM;
1858         if (!new_bp) {
1859                 bitmap_file_unmap(&store);
1860                 goto err;
1861         }
1862
1863         if (!init)
1864                 bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
1865
1866         store.file = bitmap->storage.file;
1867         bitmap->storage.file = NULL;
1868
1869         if (store.sb_page && bitmap->storage.sb_page)
1870                 memcpy(page_address(store.sb_page),
1871                        page_address(bitmap->storage.sb_page),
1872                        sizeof(bitmap_super_t));
1873         bitmap_file_unmap(&bitmap->storage);
1874         bitmap->storage = store;
1875
1876         old_counts = bitmap->counts;
1877         bitmap->counts.bp = new_bp;
1878         bitmap->counts.pages = pages;
1879         bitmap->counts.missing_pages = pages;
1880         bitmap->counts.chunkshift = chunkshift;
1881         bitmap->counts.chunks = chunks;
1882         bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
1883                                                      BITMAP_BLOCK_SHIFT);
1884
1885         blocks = min(old_counts.chunks << old_counts.chunkshift,
1886                      chunks << chunkshift);
1887
1888         spin_lock_irq(&bitmap->counts.lock);
1889         for (block = 0; block < blocks; ) {
1890                 bitmap_counter_t *bmc_old, *bmc_new;
1891                 int set;
1892
1893                 bmc_old = bitmap_get_counter(&old_counts, block,
1894                                              &old_blocks, 0);
1895                 set = bmc_old && NEEDED(*bmc_old);
1896
1897                 if (set) {
1898                         bmc_new = bitmap_get_counter(&bitmap->counts, block,
1899                                                      &new_blocks, 1);
1900                         if (*bmc_new == 0) {
1901                                 /* need to set on-disk bits too. */
1902                                 sector_t end = block + new_blocks;
1903                                 sector_t start = block >> chunkshift;
1904                                 start <<= chunkshift;
1905                                 while (start < end) {
1906                                         bitmap_file_set_bit(bitmap, block);
1907                                         start += 1 << chunkshift;
1908                                 }
1909                                 *bmc_new = 2;
1910                                 bitmap_count_page(&bitmap->counts,
1911                                                   block, 1);
1912                                 bitmap_set_pending(&bitmap->counts,
1913                                                    block);
1914                         }
1915                         *bmc_new |= NEEDED_MASK;
1916                         if (new_blocks < old_blocks)
1917                                 old_blocks = new_blocks;
1918                 }
1919                 block += old_blocks;
1920         }
1921
1922         if (!init) {
1923                 int i;
1924                 while (block < (chunks << chunkshift)) {
1925                         bitmap_counter_t *bmc;
1926                         bmc = bitmap_get_counter(&bitmap->counts, block,
1927                                                  &new_blocks, 1);
1928                         if (bmc) {
1929                                 /* new space.  It needs to be resynced, so
1930                                  * we set NEEDED_MASK.
1931                                  */
1932                                 if (*bmc == 0) {
1933                                         *bmc = NEEDED_MASK | 2;
1934                                         bitmap_count_page(&bitmap->counts,
1935                                                           block, 1);
1936                                         bitmap_set_pending(&bitmap->counts,
1937                                                            block);
1938                                 }
1939                         }
1940                         block += new_blocks;
1941                 }
1942                 for (i = 0; i < bitmap->storage.file_pages; i++)
1943                         set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1944         }
1945         spin_unlock_irq(&bitmap->counts.lock);
1946
1947         if (!init) {
1948                 bitmap_unplug(bitmap);
1949                 bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
1950         }
1951         ret = 0;
1952 err:
1953         return ret;
1954 }
1955 EXPORT_SYMBOL_GPL(bitmap_resize);
1956
1957 static ssize_t
1958 location_show(struct mddev *mddev, char *page)
1959 {
1960         ssize_t len;
1961         if (mddev->bitmap_info.file)
1962                 len = sprintf(page, "file");
1963         else if (mddev->bitmap_info.offset)
1964                 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
1965         else
1966                 len = sprintf(page, "none");
1967         len += sprintf(page+len, "\n");
1968         return len;
1969 }
1970
1971 static ssize_t
1972 location_store(struct mddev *mddev, const char *buf, size_t len)
1973 {
1974
1975         if (mddev->pers) {
1976                 if (!mddev->pers->quiesce)
1977                         return -EBUSY;
1978                 if (mddev->recovery || mddev->sync_thread)
1979                         return -EBUSY;
1980         }
1981
1982         if (mddev->bitmap || mddev->bitmap_info.file ||
1983             mddev->bitmap_info.offset) {
1984                 /* bitmap already configured.  Only option is to clear it */
1985                 if (strncmp(buf, "none", 4) != 0)
1986                         return -EBUSY;
1987                 if (mddev->pers) {
1988                         mddev->pers->quiesce(mddev, 1);
1989                         bitmap_destroy(mddev);
1990                         mddev->pers->quiesce(mddev, 0);
1991                 }
1992                 mddev->bitmap_info.offset = 0;
1993                 if (mddev->bitmap_info.file) {
1994                         struct file *f = mddev->bitmap_info.file;
1995                         mddev->bitmap_info.file = NULL;
1996                         fput(f);
1997                 }
1998         } else {
1999                 /* No bitmap, OK to set a location */
2000                 long long offset;
2001                 if (strncmp(buf, "none", 4) == 0)
2002                         /* nothing to be done */;
2003                 else if (strncmp(buf, "file:", 5) == 0) {
2004                         /* Not supported yet */
2005                         return -EINVAL;
2006                 } else {
2007                         int rv;
2008                         if (buf[0] == '+')
2009                                 rv = kstrtoll(buf+1, 10, &offset);
2010                         else
2011                                 rv = kstrtoll(buf, 10, &offset);
2012                         if (rv)
2013                                 return rv;
2014                         if (offset == 0)
2015                                 return -EINVAL;
2016                         if (mddev->bitmap_info.external == 0 &&
2017                             mddev->major_version == 0 &&
2018                             offset != mddev->bitmap_info.default_offset)
2019                                 return -EINVAL;
2020                         mddev->bitmap_info.offset = offset;
2021                         if (mddev->pers) {
2022                                 mddev->pers->quiesce(mddev, 1);
2023                                 rv = bitmap_create(mddev);
2024                                 if (!rv)
2025                                         rv = bitmap_load(mddev);
2026                                 if (rv) {
2027                                         bitmap_destroy(mddev);
2028                                         mddev->bitmap_info.offset = 0;
2029                                 }
2030                                 mddev->pers->quiesce(mddev, 0);
2031                                 if (rv)
2032                                         return rv;
2033                         }
2034                 }
2035         }
2036         if (!mddev->external) {
2037                 /* Ensure new bitmap info is stored in
2038                  * metadata promptly.
2039                  */
2040                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2041                 md_wakeup_thread(mddev->thread);
2042         }
2043         return len;
2044 }
2045
2046 static struct md_sysfs_entry bitmap_location =
2047 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2048
2049 /* 'bitmap/space' is the space available at 'location' for the
2050  * bitmap.  This allows the kernel to know when it is safe to
2051  * resize the bitmap to match a resized array.
2052  */
2053 static ssize_t
2054 space_show(struct mddev *mddev, char *page)
2055 {
2056         return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2057 }
2058
2059 static ssize_t
2060 space_store(struct mddev *mddev, const char *buf, size_t len)
2061 {
2062         unsigned long sectors;
2063         int rv;
2064
2065         rv = kstrtoul(buf, 10, &sectors);
2066         if (rv)
2067                 return rv;
2068
2069         if (sectors == 0)
2070                 return -EINVAL;
2071
2072         if (mddev->bitmap &&
2073             sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2074                 return -EFBIG; /* Bitmap is too big for this small space */
2075
2076         /* could make sure it isn't too big, but that isn't really
2077          * needed - user-space should be careful.
2078          */
2079         mddev->bitmap_info.space = sectors;
2080         return len;
2081 }
2082
2083 static struct md_sysfs_entry bitmap_space =
2084 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2085
2086 static ssize_t
2087 timeout_show(struct mddev *mddev, char *page)
2088 {
2089         ssize_t len;
2090         unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2091         unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2092
2093         len = sprintf(page, "%lu", secs);
2094         if (jifs)
2095                 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2096         len += sprintf(page+len, "\n");
2097         return len;
2098 }
2099
2100 static ssize_t
2101 timeout_store(struct mddev *mddev, const char *buf, size_t len)
2102 {
2103         /* timeout can be set at any time */
2104         unsigned long timeout;
2105         int rv = strict_strtoul_scaled(buf, &timeout, 4);
2106         if (rv)
2107                 return rv;
2108
2109         /* just to make sure we don't overflow... */
2110         if (timeout >= LONG_MAX / HZ)
2111                 return -EINVAL;
2112
2113         timeout = timeout * HZ / 10000;
2114
2115         if (timeout >= MAX_SCHEDULE_TIMEOUT)
2116                 timeout = MAX_SCHEDULE_TIMEOUT-1;
2117         if (timeout < 1)
2118                 timeout = 1;
2119         mddev->bitmap_info.daemon_sleep = timeout;
2120         if (mddev->thread) {
2121                 /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
2122                  * the bitmap is all clean and we don't need to
2123                  * adjust the timeout right now
2124                  */
2125                 if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
2126                         mddev->thread->timeout = timeout;
2127                         md_wakeup_thread(mddev->thread);
2128                 }
2129         }
2130         return len;
2131 }
2132
2133 static struct md_sysfs_entry bitmap_timeout =
2134 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2135
2136 static ssize_t
2137 backlog_show(struct mddev *mddev, char *page)
2138 {
2139         return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2140 }
2141
2142 static ssize_t
2143 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2144 {
2145         unsigned long backlog;
2146         int rv = kstrtoul(buf, 10, &backlog);
2147         if (rv)
2148                 return rv;
2149         if (backlog > COUNTER_MAX)
2150                 return -EINVAL;
2151         mddev->bitmap_info.max_write_behind = backlog;
2152         return len;
2153 }
2154
2155 static struct md_sysfs_entry bitmap_backlog =
2156 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2157
2158 static ssize_t
2159 chunksize_show(struct mddev *mddev, char *page)
2160 {
2161         return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2162 }
2163
2164 static ssize_t
2165 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2166 {
2167         /* Can only be changed when no bitmap is active */
2168         int rv;
2169         unsigned long csize;
2170         if (mddev->bitmap)
2171                 return -EBUSY;
2172         rv = kstrtoul(buf, 10, &csize);
2173         if (rv)
2174                 return rv;
2175         if (csize < 512 ||
2176             !is_power_of_2(csize))
2177                 return -EINVAL;
2178         mddev->bitmap_info.chunksize = csize;
2179         return len;
2180 }
2181
2182 static struct md_sysfs_entry bitmap_chunksize =
2183 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2184
2185 static ssize_t metadata_show(struct mddev *mddev, char *page)
2186 {
2187         return sprintf(page, "%s\n", (mddev->bitmap_info.external
2188                                       ? "external" : "internal"));
2189 }
2190
2191 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2192 {
2193         if (mddev->bitmap ||
2194             mddev->bitmap_info.file ||
2195             mddev->bitmap_info.offset)
2196                 return -EBUSY;
2197         if (strncmp(buf, "external", 8) == 0)
2198                 mddev->bitmap_info.external = 1;
2199         else if (strncmp(buf, "internal", 8) == 0)
2200                 mddev->bitmap_info.external = 0;
2201         else
2202                 return -EINVAL;
2203         return len;
2204 }
2205
2206 static struct md_sysfs_entry bitmap_metadata =
2207 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2208
2209 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2210 {
2211         int len;
2212         if (mddev->bitmap)
2213                 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2214                                              "false" : "true"));
2215         else
2216                 len = sprintf(page, "\n");
2217         return len;
2218 }
2219
2220 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2221 {
2222         if (mddev->bitmap == NULL)
2223                 return -ENOENT;
2224         if (strncmp(buf, "false", 5) == 0)
2225                 mddev->bitmap->need_sync = 1;
2226         else if (strncmp(buf, "true", 4) == 0) {
2227                 if (mddev->degraded)
2228                         return -EBUSY;
2229                 mddev->bitmap->need_sync = 0;
2230         } else
2231                 return -EINVAL;
2232         return len;
2233 }
2234
2235 static struct md_sysfs_entry bitmap_can_clear =
2236 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2237
2238 static ssize_t
2239 behind_writes_used_show(struct mddev *mddev, char *page)
2240 {
2241         if (mddev->bitmap == NULL)
2242                 return sprintf(page, "0\n");
2243         return sprintf(page, "%lu\n",
2244                        mddev->bitmap->behind_writes_used);
2245 }
2246
2247 static ssize_t
2248 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2249 {
2250         if (mddev->bitmap)
2251                 mddev->bitmap->behind_writes_used = 0;
2252         return len;
2253 }
2254
2255 static struct md_sysfs_entry max_backlog_used =
2256 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2257        behind_writes_used_show, behind_writes_used_reset);
2258
2259 static struct attribute *md_bitmap_attrs[] = {
2260         &bitmap_location.attr,
2261         &bitmap_space.attr,
2262         &bitmap_timeout.attr,
2263         &bitmap_backlog.attr,
2264         &bitmap_chunksize.attr,
2265         &bitmap_metadata.attr,
2266         &bitmap_can_clear.attr,
2267         &max_backlog_used.attr,
2268         NULL
2269 };
2270 struct attribute_group md_bitmap_group = {
2271         .name = "bitmap",
2272         .attrs = md_bitmap_attrs,
2273 };
2274