Use separate bitmaps for each nodes in the cluster
[firefly-linux-kernel-4.4.55.git] / drivers / md / bitmap.c
1 /*
2  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3  *
4  * bitmap_create  - sets up the bitmap structure
5  * bitmap_destroy - destroys the bitmap structure
6  *
7  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8  * - added disk storage for bitmap
9  * - changes to allow various bitmap chunk sizes
10  */
11
12 /*
13  * Still to do:
14  *
15  * flush after percent set rather than just time based. (maybe both).
16  */
17
18 #include <linux/blkdev.h>
19 #include <linux/module.h>
20 #include <linux/errno.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/timer.h>
24 #include <linux/sched.h>
25 #include <linux/list.h>
26 #include <linux/file.h>
27 #include <linux/mount.h>
28 #include <linux/buffer_head.h>
29 #include <linux/seq_file.h>
30 #include "md.h"
31 #include "bitmap.h"
32
33 static inline char *bmname(struct bitmap *bitmap)
34 {
35         return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
36 }
37
38 /*
39  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
40  *
41  * 1) check to see if this page is allocated, if it's not then try to alloc
42  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
43  *    page pointer directly as a counter
44  *
45  * if we find our page, we increment the page's refcount so that it stays
46  * allocated while we're using it
47  */
48 static int bitmap_checkpage(struct bitmap_counts *bitmap,
49                             unsigned long page, int create)
50 __releases(bitmap->lock)
51 __acquires(bitmap->lock)
52 {
53         unsigned char *mappage;
54
55         if (page >= bitmap->pages) {
56                 /* This can happen if bitmap_start_sync goes beyond
57                  * End-of-device while looking for a whole page.
58                  * It is harmless.
59                  */
60                 return -EINVAL;
61         }
62
63         if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
64                 return 0;
65
66         if (bitmap->bp[page].map) /* page is already allocated, just return */
67                 return 0;
68
69         if (!create)
70                 return -ENOENT;
71
72         /* this page has not been allocated yet */
73
74         spin_unlock_irq(&bitmap->lock);
75         /* It is possible that this is being called inside a
76          * prepare_to_wait/finish_wait loop from raid5c:make_request().
77          * In general it is not permitted to sleep in that context as it
78          * can cause the loop to spin freely.
79          * That doesn't apply here as we can only reach this point
80          * once with any loop.
81          * When this function completes, either bp[page].map or
82          * bp[page].hijacked.  In either case, this function will
83          * abort before getting to this point again.  So there is
84          * no risk of a free-spin, and so it is safe to assert
85          * that sleeping here is allowed.
86          */
87         sched_annotate_sleep();
88         mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
89         spin_lock_irq(&bitmap->lock);
90
91         if (mappage == NULL) {
92                 pr_debug("md/bitmap: map page allocation failed, hijacking\n");
93                 /* failed - set the hijacked flag so that we can use the
94                  * pointer as a counter */
95                 if (!bitmap->bp[page].map)
96                         bitmap->bp[page].hijacked = 1;
97         } else if (bitmap->bp[page].map ||
98                    bitmap->bp[page].hijacked) {
99                 /* somebody beat us to getting the page */
100                 kfree(mappage);
101                 return 0;
102         } else {
103
104                 /* no page was in place and we have one, so install it */
105
106                 bitmap->bp[page].map = mappage;
107                 bitmap->missing_pages--;
108         }
109         return 0;
110 }
111
112 /* if page is completely empty, put it back on the free list, or dealloc it */
113 /* if page was hijacked, unmark the flag so it might get alloced next time */
114 /* Note: lock should be held when calling this */
115 static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
116 {
117         char *ptr;
118
119         if (bitmap->bp[page].count) /* page is still busy */
120                 return;
121
122         /* page is no longer in use, it can be released */
123
124         if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
125                 bitmap->bp[page].hijacked = 0;
126                 bitmap->bp[page].map = NULL;
127         } else {
128                 /* normal case, free the page */
129                 ptr = bitmap->bp[page].map;
130                 bitmap->bp[page].map = NULL;
131                 bitmap->missing_pages++;
132                 kfree(ptr);
133         }
134 }
135
136 /*
137  * bitmap file handling - read and write the bitmap file and its superblock
138  */
139
140 /*
141  * basic page I/O operations
142  */
143
144 /* IO operations when bitmap is stored near all superblocks */
145 static int read_sb_page(struct mddev *mddev, loff_t offset,
146                         struct page *page,
147                         unsigned long index, int size)
148 {
149         /* choose a good rdev and read the page from there */
150
151         struct md_rdev *rdev;
152         sector_t target;
153
154         rdev_for_each(rdev, mddev) {
155                 if (! test_bit(In_sync, &rdev->flags)
156                     || test_bit(Faulty, &rdev->flags))
157                         continue;
158
159                 target = offset + index * (PAGE_SIZE/512);
160
161                 if (sync_page_io(rdev, target,
162                                  roundup(size, bdev_logical_block_size(rdev->bdev)),
163                                  page, READ, true)) {
164                         page->index = index;
165                         return 0;
166                 }
167         }
168         return -EIO;
169 }
170
171 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
172 {
173         /* Iterate the disks of an mddev, using rcu to protect access to the
174          * linked list, and raising the refcount of devices we return to ensure
175          * they don't disappear while in use.
176          * As devices are only added or removed when raid_disk is < 0 and
177          * nr_pending is 0 and In_sync is clear, the entries we return will
178          * still be in the same position on the list when we re-enter
179          * list_for_each_entry_continue_rcu.
180          */
181         rcu_read_lock();
182         if (rdev == NULL)
183                 /* start at the beginning */
184                 rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set);
185         else {
186                 /* release the previous rdev and start from there. */
187                 rdev_dec_pending(rdev, mddev);
188         }
189         list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
190                 if (rdev->raid_disk >= 0 &&
191                     !test_bit(Faulty, &rdev->flags)) {
192                         /* this is a usable devices */
193                         atomic_inc(&rdev->nr_pending);
194                         rcu_read_unlock();
195                         return rdev;
196                 }
197         }
198         rcu_read_unlock();
199         return NULL;
200 }
201
202 static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
203 {
204         struct md_rdev *rdev = NULL;
205         struct block_device *bdev;
206         struct mddev *mddev = bitmap->mddev;
207         struct bitmap_storage *store = &bitmap->storage;
208         int node_offset = 0;
209
210         if (mddev_is_clustered(bitmap->mddev))
211                 node_offset = bitmap->cluster_slot * store->file_pages;
212
213         while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
214                 int size = PAGE_SIZE;
215                 loff_t offset = mddev->bitmap_info.offset;
216
217                 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
218
219                 if (page->index == store->file_pages-1) {
220                         int last_page_size = store->bytes & (PAGE_SIZE-1);
221                         if (last_page_size == 0)
222                                 last_page_size = PAGE_SIZE;
223                         size = roundup(last_page_size,
224                                        bdev_logical_block_size(bdev));
225                 }
226                 /* Just make sure we aren't corrupting data or
227                  * metadata
228                  */
229                 if (mddev->external) {
230                         /* Bitmap could be anywhere. */
231                         if (rdev->sb_start + offset + (page->index
232                                                        * (PAGE_SIZE/512))
233                             > rdev->data_offset
234                             &&
235                             rdev->sb_start + offset
236                             < (rdev->data_offset + mddev->dev_sectors
237                              + (PAGE_SIZE/512)))
238                                 goto bad_alignment;
239                 } else if (offset < 0) {
240                         /* DATA  BITMAP METADATA  */
241                         if (offset
242                             + (long)(page->index * (PAGE_SIZE/512))
243                             + size/512 > 0)
244                                 /* bitmap runs in to metadata */
245                                 goto bad_alignment;
246                         if (rdev->data_offset + mddev->dev_sectors
247                             > rdev->sb_start + offset)
248                                 /* data runs in to bitmap */
249                                 goto bad_alignment;
250                 } else if (rdev->sb_start < rdev->data_offset) {
251                         /* METADATA BITMAP DATA */
252                         if (rdev->sb_start
253                             + offset
254                             + page->index*(PAGE_SIZE/512) + size/512
255                             > rdev->data_offset)
256                                 /* bitmap runs in to data */
257                                 goto bad_alignment;
258                 } else {
259                         /* DATA METADATA BITMAP - no problems */
260                 }
261                 md_super_write(mddev, rdev,
262                                rdev->sb_start + offset
263                                + page->index * (PAGE_SIZE/512),
264                                size,
265                                page);
266         }
267
268         if (wait)
269                 md_super_wait(mddev);
270         return 0;
271
272  bad_alignment:
273         return -EINVAL;
274 }
275
276 static void bitmap_file_kick(struct bitmap *bitmap);
277 /*
278  * write out a page to a file
279  */
280 static void write_page(struct bitmap *bitmap, struct page *page, int wait)
281 {
282         struct buffer_head *bh;
283
284         if (bitmap->storage.file == NULL) {
285                 switch (write_sb_page(bitmap, page, wait)) {
286                 case -EINVAL:
287                         set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
288                 }
289         } else {
290
291                 bh = page_buffers(page);
292
293                 while (bh && bh->b_blocknr) {
294                         atomic_inc(&bitmap->pending_writes);
295                         set_buffer_locked(bh);
296                         set_buffer_mapped(bh);
297                         submit_bh(WRITE | REQ_SYNC, bh);
298                         bh = bh->b_this_page;
299                 }
300
301                 if (wait)
302                         wait_event(bitmap->write_wait,
303                                    atomic_read(&bitmap->pending_writes)==0);
304         }
305         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
306                 bitmap_file_kick(bitmap);
307 }
308
309 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
310 {
311         struct bitmap *bitmap = bh->b_private;
312
313         if (!uptodate)
314                 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
315         if (atomic_dec_and_test(&bitmap->pending_writes))
316                 wake_up(&bitmap->write_wait);
317 }
318
319 /* copied from buffer.c */
320 static void
321 __clear_page_buffers(struct page *page)
322 {
323         ClearPagePrivate(page);
324         set_page_private(page, 0);
325         page_cache_release(page);
326 }
327 static void free_buffers(struct page *page)
328 {
329         struct buffer_head *bh;
330
331         if (!PagePrivate(page))
332                 return;
333
334         bh = page_buffers(page);
335         while (bh) {
336                 struct buffer_head *next = bh->b_this_page;
337                 free_buffer_head(bh);
338                 bh = next;
339         }
340         __clear_page_buffers(page);
341         put_page(page);
342 }
343
344 /* read a page from a file.
345  * We both read the page, and attach buffers to the page to record the
346  * address of each block (using bmap).  These addresses will be used
347  * to write the block later, completely bypassing the filesystem.
348  * This usage is similar to how swap files are handled, and allows us
349  * to write to a file with no concerns of memory allocation failing.
350  */
351 static int read_page(struct file *file, unsigned long index,
352                      struct bitmap *bitmap,
353                      unsigned long count,
354                      struct page *page)
355 {
356         int ret = 0;
357         struct inode *inode = file_inode(file);
358         struct buffer_head *bh;
359         sector_t block;
360
361         pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
362                  (unsigned long long)index << PAGE_SHIFT);
363
364         bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
365         if (!bh) {
366                 ret = -ENOMEM;
367                 goto out;
368         }
369         attach_page_buffers(page, bh);
370         block = index << (PAGE_SHIFT - inode->i_blkbits);
371         while (bh) {
372                 if (count == 0)
373                         bh->b_blocknr = 0;
374                 else {
375                         bh->b_blocknr = bmap(inode, block);
376                         if (bh->b_blocknr == 0) {
377                                 /* Cannot use this file! */
378                                 ret = -EINVAL;
379                                 goto out;
380                         }
381                         bh->b_bdev = inode->i_sb->s_bdev;
382                         if (count < (1<<inode->i_blkbits))
383                                 count = 0;
384                         else
385                                 count -= (1<<inode->i_blkbits);
386
387                         bh->b_end_io = end_bitmap_write;
388                         bh->b_private = bitmap;
389                         atomic_inc(&bitmap->pending_writes);
390                         set_buffer_locked(bh);
391                         set_buffer_mapped(bh);
392                         submit_bh(READ, bh);
393                 }
394                 block++;
395                 bh = bh->b_this_page;
396         }
397         page->index = index;
398
399         wait_event(bitmap->write_wait,
400                    atomic_read(&bitmap->pending_writes)==0);
401         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
402                 ret = -EIO;
403 out:
404         if (ret)
405                 printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
406                         (int)PAGE_SIZE,
407                         (unsigned long long)index << PAGE_SHIFT,
408                         ret);
409         return ret;
410 }
411
412 /*
413  * bitmap file superblock operations
414  */
415
416 /* update the event counter and sync the superblock to disk */
417 void bitmap_update_sb(struct bitmap *bitmap)
418 {
419         bitmap_super_t *sb;
420
421         if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
422                 return;
423         if (bitmap->mddev->bitmap_info.external)
424                 return;
425         if (!bitmap->storage.sb_page) /* no superblock */
426                 return;
427         sb = kmap_atomic(bitmap->storage.sb_page);
428         sb->events = cpu_to_le64(bitmap->mddev->events);
429         if (bitmap->mddev->events < bitmap->events_cleared)
430                 /* rocking back to read-only */
431                 bitmap->events_cleared = bitmap->mddev->events;
432         sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
433         sb->state = cpu_to_le32(bitmap->flags);
434         /* Just in case these have been changed via sysfs: */
435         sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
436         sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
437         /* This might have been changed by a reshape */
438         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
439         sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
440         sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
441         sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
442                                            bitmap_info.space);
443         kunmap_atomic(sb);
444         write_page(bitmap, bitmap->storage.sb_page, 1);
445 }
446
447 /* print out the bitmap file superblock */
448 void bitmap_print_sb(struct bitmap *bitmap)
449 {
450         bitmap_super_t *sb;
451
452         if (!bitmap || !bitmap->storage.sb_page)
453                 return;
454         sb = kmap_atomic(bitmap->storage.sb_page);
455         printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
456         printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
457         printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
458         printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
459                                         *(__u32 *)(sb->uuid+0),
460                                         *(__u32 *)(sb->uuid+4),
461                                         *(__u32 *)(sb->uuid+8),
462                                         *(__u32 *)(sb->uuid+12));
463         printk(KERN_DEBUG "        events: %llu\n",
464                         (unsigned long long) le64_to_cpu(sb->events));
465         printk(KERN_DEBUG "events cleared: %llu\n",
466                         (unsigned long long) le64_to_cpu(sb->events_cleared));
467         printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
468         printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
469         printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
470         printk(KERN_DEBUG "     sync size: %llu KB\n",
471                         (unsigned long long)le64_to_cpu(sb->sync_size)/2);
472         printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
473         kunmap_atomic(sb);
474 }
475
476 /*
477  * bitmap_new_disk_sb
478  * @bitmap
479  *
480  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
481  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
482  * This function verifies 'bitmap_info' and populates the on-disk bitmap
483  * structure, which is to be written to disk.
484  *
485  * Returns: 0 on success, -Exxx on error
486  */
487 static int bitmap_new_disk_sb(struct bitmap *bitmap)
488 {
489         bitmap_super_t *sb;
490         unsigned long chunksize, daemon_sleep, write_behind;
491
492         bitmap->storage.sb_page = alloc_page(GFP_KERNEL);
493         if (bitmap->storage.sb_page == NULL)
494                 return -ENOMEM;
495         bitmap->storage.sb_page->index = 0;
496
497         sb = kmap_atomic(bitmap->storage.sb_page);
498
499         sb->magic = cpu_to_le32(BITMAP_MAGIC);
500         sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
501
502         chunksize = bitmap->mddev->bitmap_info.chunksize;
503         BUG_ON(!chunksize);
504         if (!is_power_of_2(chunksize)) {
505                 kunmap_atomic(sb);
506                 printk(KERN_ERR "bitmap chunksize not a power of 2\n");
507                 return -EINVAL;
508         }
509         sb->chunksize = cpu_to_le32(chunksize);
510
511         daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
512         if (!daemon_sleep ||
513             (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
514                 printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
515                 daemon_sleep = 5 * HZ;
516         }
517         sb->daemon_sleep = cpu_to_le32(daemon_sleep);
518         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
519
520         /*
521          * FIXME: write_behind for RAID1.  If not specified, what
522          * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
523          */
524         write_behind = bitmap->mddev->bitmap_info.max_write_behind;
525         if (write_behind > COUNTER_MAX)
526                 write_behind = COUNTER_MAX / 2;
527         sb->write_behind = cpu_to_le32(write_behind);
528         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
529
530         /* keep the array size field of the bitmap superblock up to date */
531         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
532
533         memcpy(sb->uuid, bitmap->mddev->uuid, 16);
534
535         set_bit(BITMAP_STALE, &bitmap->flags);
536         sb->state = cpu_to_le32(bitmap->flags);
537         bitmap->events_cleared = bitmap->mddev->events;
538         sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
539
540         kunmap_atomic(sb);
541
542         return 0;
543 }
544
545 /* read the superblock from the bitmap file and initialize some bitmap fields */
546 static int bitmap_read_sb(struct bitmap *bitmap)
547 {
548         char *reason = NULL;
549         bitmap_super_t *sb;
550         unsigned long chunksize, daemon_sleep, write_behind;
551         unsigned long long events;
552         int nodes = 0;
553         unsigned long sectors_reserved = 0;
554         int err = -EINVAL;
555         struct page *sb_page;
556         int cluster_setup_done = 0;
557
558         if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
559                 chunksize = 128 * 1024 * 1024;
560                 daemon_sleep = 5 * HZ;
561                 write_behind = 0;
562                 set_bit(BITMAP_STALE, &bitmap->flags);
563                 err = 0;
564                 goto out_no_sb;
565         }
566         /* page 0 is the superblock, read it... */
567         sb_page = alloc_page(GFP_KERNEL);
568         if (!sb_page)
569                 return -ENOMEM;
570         bitmap->storage.sb_page = sb_page;
571
572 re_read:
573         if (bitmap->storage.file) {
574                 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
575                 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
576
577                 err = read_page(bitmap->storage.file, 0,
578                                 bitmap, bytes, sb_page);
579         } else {
580                 err = read_sb_page(bitmap->mddev,
581                                    bitmap->mddev->bitmap_info.offset,
582                                    sb_page,
583                                    0, sizeof(bitmap_super_t));
584         }
585         if (err)
586                 return err;
587
588         err = -EINVAL;
589         sb = kmap_atomic(sb_page);
590
591         chunksize = le32_to_cpu(sb->chunksize);
592         daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
593         write_behind = le32_to_cpu(sb->write_behind);
594         sectors_reserved = le32_to_cpu(sb->sectors_reserved);
595         nodes = le32_to_cpu(sb->nodes);
596         strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
597
598         /* verify that the bitmap-specific fields are valid */
599         if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
600                 reason = "bad magic";
601         else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
602                  le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
603                 reason = "unrecognized superblock version";
604         else if (chunksize < 512)
605                 reason = "bitmap chunksize too small";
606         else if (!is_power_of_2(chunksize))
607                 reason = "bitmap chunksize not a power of 2";
608         else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
609                 reason = "daemon sleep period out of range";
610         else if (write_behind > COUNTER_MAX)
611                 reason = "write-behind limit out of range (0 - 16383)";
612         if (reason) {
613                 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
614                         bmname(bitmap), reason);
615                 goto out;
616         }
617
618         /* keep the array size field of the bitmap superblock up to date */
619         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
620
621         if (bitmap->mddev->persistent) {
622                 /*
623                  * We have a persistent array superblock, so compare the
624                  * bitmap's UUID and event counter to the mddev's
625                  */
626                 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
627                         printk(KERN_INFO
628                                "%s: bitmap superblock UUID mismatch\n",
629                                bmname(bitmap));
630                         goto out;
631                 }
632                 events = le64_to_cpu(sb->events);
633                 if (!nodes && (events < bitmap->mddev->events)) {
634                         printk(KERN_INFO
635                                "%s: bitmap file is out of date (%llu < %llu) "
636                                "-- forcing full recovery\n",
637                                bmname(bitmap), events,
638                                (unsigned long long) bitmap->mddev->events);
639                         set_bit(BITMAP_STALE, &bitmap->flags);
640                 }
641         }
642
643         /* assign fields using values from superblock */
644         bitmap->flags |= le32_to_cpu(sb->state);
645         if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
646                 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
647         bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
648         strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
649         err = 0;
650
651 out:
652         kunmap_atomic(sb);
653         if (nodes && !cluster_setup_done) {
654                 sector_t bm_blocks;
655
656                 bm_blocks = sector_div(bitmap->mddev->resync_max_sectors, (chunksize >> 9));
657                 bm_blocks = bm_blocks << 3;
658                 /* We have bitmap supers at 4k boundaries, hence this
659                  * is hardcoded */
660                 bm_blocks = DIV_ROUND_UP(bm_blocks, 4096);
661                 err = md_setup_cluster(bitmap->mddev, nodes);
662                 if (err) {
663                         pr_err("%s: Could not setup cluster service (%d)\n",
664                                         bmname(bitmap), err);
665                         goto out_no_sb;
666                 }
667                 bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
668                 bitmap->mddev->bitmap_info.offset +=
669                         bitmap->cluster_slot * (bm_blocks << 3);
670                 pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
671                         bitmap->cluster_slot,
672                         (unsigned long long)bitmap->mddev->bitmap_info.offset);
673                 cluster_setup_done = 1;
674                 goto re_read;
675         }
676
677
678 out_no_sb:
679         if (test_bit(BITMAP_STALE, &bitmap->flags))
680                 bitmap->events_cleared = bitmap->mddev->events;
681         bitmap->mddev->bitmap_info.chunksize = chunksize;
682         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
683         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
684         bitmap->mddev->bitmap_info.nodes = nodes;
685         if (bitmap->mddev->bitmap_info.space == 0 ||
686             bitmap->mddev->bitmap_info.space > sectors_reserved)
687                 bitmap->mddev->bitmap_info.space = sectors_reserved;
688         if (err) {
689                 bitmap_print_sb(bitmap);
690                 if (cluster_setup_done)
691                         md_cluster_stop(bitmap->mddev);
692         }
693         return err;
694 }
695
696 /*
697  * general bitmap file operations
698  */
699
700 /*
701  * on-disk bitmap:
702  *
703  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
704  * file a page at a time. There's a superblock at the start of the file.
705  */
706 /* calculate the index of the page that contains this bit */
707 static inline unsigned long file_page_index(struct bitmap_storage *store,
708                                             unsigned long chunk)
709 {
710         if (store->sb_page)
711                 chunk += sizeof(bitmap_super_t) << 3;
712         return chunk >> PAGE_BIT_SHIFT;
713 }
714
715 /* calculate the (bit) offset of this bit within a page */
716 static inline unsigned long file_page_offset(struct bitmap_storage *store,
717                                              unsigned long chunk)
718 {
719         if (store->sb_page)
720                 chunk += sizeof(bitmap_super_t) << 3;
721         return chunk & (PAGE_BITS - 1);
722 }
723
724 /*
725  * return a pointer to the page in the filemap that contains the given bit
726  *
727  */
728 static inline struct page *filemap_get_page(struct bitmap_storage *store,
729                                             unsigned long chunk)
730 {
731         if (file_page_index(store, chunk) >= store->file_pages)
732                 return NULL;
733         return store->filemap[file_page_index(store, chunk)];
734 }
735
736 static int bitmap_storage_alloc(struct bitmap_storage *store,
737                                 unsigned long chunks, int with_super,
738                                 int slot_number)
739 {
740         int pnum, offset = 0;
741         unsigned long num_pages;
742         unsigned long bytes;
743
744         bytes = DIV_ROUND_UP(chunks, 8);
745         if (with_super)
746                 bytes += sizeof(bitmap_super_t);
747
748         num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
749         offset = slot_number * (num_pages - 1);
750
751         store->filemap = kmalloc(sizeof(struct page *)
752                                  * num_pages, GFP_KERNEL);
753         if (!store->filemap)
754                 return -ENOMEM;
755
756         if (with_super && !store->sb_page) {
757                 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
758                 if (store->sb_page == NULL)
759                         return -ENOMEM;
760         }
761
762         pnum = 0;
763         if (store->sb_page) {
764                 store->filemap[0] = store->sb_page;
765                 pnum = 1;
766                 store->sb_page->index = offset;
767         }
768
769         for ( ; pnum < num_pages; pnum++) {
770                 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
771                 if (!store->filemap[pnum]) {
772                         store->file_pages = pnum;
773                         return -ENOMEM;
774                 }
775                 store->filemap[pnum]->index = pnum + offset;
776         }
777         store->file_pages = pnum;
778
779         /* We need 4 bits per page, rounded up to a multiple
780          * of sizeof(unsigned long) */
781         store->filemap_attr = kzalloc(
782                 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
783                 GFP_KERNEL);
784         if (!store->filemap_attr)
785                 return -ENOMEM;
786
787         store->bytes = bytes;
788
789         return 0;
790 }
791
792 static void bitmap_file_unmap(struct bitmap_storage *store)
793 {
794         struct page **map, *sb_page;
795         int pages;
796         struct file *file;
797
798         file = store->file;
799         map = store->filemap;
800         pages = store->file_pages;
801         sb_page = store->sb_page;
802
803         while (pages--)
804                 if (map[pages] != sb_page) /* 0 is sb_page, release it below */
805                         free_buffers(map[pages]);
806         kfree(map);
807         kfree(store->filemap_attr);
808
809         if (sb_page)
810                 free_buffers(sb_page);
811
812         if (file) {
813                 struct inode *inode = file_inode(file);
814                 invalidate_mapping_pages(inode->i_mapping, 0, -1);
815                 fput(file);
816         }
817 }
818
819 /*
820  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
821  * then it is no longer reliable, so we stop using it and we mark the file
822  * as failed in the superblock
823  */
824 static void bitmap_file_kick(struct bitmap *bitmap)
825 {
826         char *path, *ptr = NULL;
827
828         if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
829                 bitmap_update_sb(bitmap);
830
831                 if (bitmap->storage.file) {
832                         path = kmalloc(PAGE_SIZE, GFP_KERNEL);
833                         if (path)
834                                 ptr = d_path(&bitmap->storage.file->f_path,
835                                              path, PAGE_SIZE);
836
837                         printk(KERN_ALERT
838                               "%s: kicking failed bitmap file %s from array!\n",
839                               bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
840
841                         kfree(path);
842                 } else
843                         printk(KERN_ALERT
844                                "%s: disabling internal bitmap due to errors\n",
845                                bmname(bitmap));
846         }
847 }
848
849 enum bitmap_page_attr {
850         BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
851         BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
852                                     * i.e. counter is 1 or 2. */
853         BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
854 };
855
856 static inline void set_page_attr(struct bitmap *bitmap, int pnum,
857                                  enum bitmap_page_attr attr)
858 {
859         set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
860 }
861
862 static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
863                                    enum bitmap_page_attr attr)
864 {
865         clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
866 }
867
868 static inline int test_page_attr(struct bitmap *bitmap, int pnum,
869                                  enum bitmap_page_attr attr)
870 {
871         return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
872 }
873
874 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
875                                            enum bitmap_page_attr attr)
876 {
877         return test_and_clear_bit((pnum<<2) + attr,
878                                   bitmap->storage.filemap_attr);
879 }
880 /*
881  * bitmap_file_set_bit -- called before performing a write to the md device
882  * to set (and eventually sync) a particular bit in the bitmap file
883  *
884  * we set the bit immediately, then we record the page number so that
885  * when an unplug occurs, we can flush the dirty pages out to disk
886  */
887 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
888 {
889         unsigned long bit;
890         struct page *page;
891         void *kaddr;
892         unsigned long chunk = block >> bitmap->counts.chunkshift;
893
894         page = filemap_get_page(&bitmap->storage, chunk);
895         if (!page)
896                 return;
897         bit = file_page_offset(&bitmap->storage, chunk);
898
899         /* set the bit */
900         kaddr = kmap_atomic(page);
901         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
902                 set_bit(bit, kaddr);
903         else
904                 set_bit_le(bit, kaddr);
905         kunmap_atomic(kaddr);
906         pr_debug("set file bit %lu page %lu\n", bit, page->index);
907         /* record page number so it gets flushed to disk when unplug occurs */
908         set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
909 }
910
911 static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
912 {
913         unsigned long bit;
914         struct page *page;
915         void *paddr;
916         unsigned long chunk = block >> bitmap->counts.chunkshift;
917
918         page = filemap_get_page(&bitmap->storage, chunk);
919         if (!page)
920                 return;
921         bit = file_page_offset(&bitmap->storage, chunk);
922         paddr = kmap_atomic(page);
923         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
924                 clear_bit(bit, paddr);
925         else
926                 clear_bit_le(bit, paddr);
927         kunmap_atomic(paddr);
928         if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
929                 set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
930                 bitmap->allclean = 0;
931         }
932 }
933
934 /* this gets called when the md device is ready to unplug its underlying
935  * (slave) device queues -- before we let any writes go down, we need to
936  * sync the dirty pages of the bitmap file to disk */
937 void bitmap_unplug(struct bitmap *bitmap)
938 {
939         unsigned long i;
940         int dirty, need_write;
941
942         if (!bitmap || !bitmap->storage.filemap ||
943             test_bit(BITMAP_STALE, &bitmap->flags))
944                 return;
945
946         /* look at each page to see if there are any set bits that need to be
947          * flushed out to disk */
948         for (i = 0; i < bitmap->storage.file_pages; i++) {
949                 if (!bitmap->storage.filemap)
950                         return;
951                 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
952                 need_write = test_and_clear_page_attr(bitmap, i,
953                                                       BITMAP_PAGE_NEEDWRITE);
954                 if (dirty || need_write) {
955                         clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
956                         write_page(bitmap, bitmap->storage.filemap[i], 0);
957                 }
958         }
959         if (bitmap->storage.file)
960                 wait_event(bitmap->write_wait,
961                            atomic_read(&bitmap->pending_writes)==0);
962         else
963                 md_super_wait(bitmap->mddev);
964
965         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
966                 bitmap_file_kick(bitmap);
967 }
968 EXPORT_SYMBOL(bitmap_unplug);
969
970 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
971 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
972  * the in-memory bitmap from the on-disk bitmap -- also, sets up the
973  * memory mapping of the bitmap file
974  * Special cases:
975  *   if there's no bitmap file, or if the bitmap file had been
976  *   previously kicked from the array, we mark all the bits as
977  *   1's in order to cause a full resync.
978  *
979  * We ignore all bits for sectors that end earlier than 'start'.
980  * This is used when reading an out-of-date bitmap...
981  */
982 static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
983 {
984         unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
985         struct page *page = NULL;
986         unsigned long bit_cnt = 0;
987         struct file *file;
988         unsigned long offset;
989         int outofdate;
990         int ret = -ENOSPC;
991         void *paddr;
992         struct bitmap_storage *store = &bitmap->storage;
993
994         chunks = bitmap->counts.chunks;
995         file = store->file;
996
997         if (!file && !bitmap->mddev->bitmap_info.offset) {
998                 /* No permanent bitmap - fill with '1s'. */
999                 store->filemap = NULL;
1000                 store->file_pages = 0;
1001                 for (i = 0; i < chunks ; i++) {
1002                         /* if the disk bit is set, set the memory bit */
1003                         int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1004                                       >= start);
1005                         bitmap_set_memory_bits(bitmap,
1006                                                (sector_t)i << bitmap->counts.chunkshift,
1007                                                needed);
1008                 }
1009                 return 0;
1010         }
1011
1012         outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1013         if (outofdate)
1014                 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
1015                         "recovery\n", bmname(bitmap));
1016
1017         if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1018                 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
1019                        bmname(bitmap),
1020                        (unsigned long) i_size_read(file->f_mapping->host),
1021                        store->bytes);
1022                 goto err;
1023         }
1024
1025         oldindex = ~0L;
1026         offset = 0;
1027         if (!bitmap->mddev->bitmap_info.external)
1028                 offset = sizeof(bitmap_super_t);
1029
1030         if (mddev_is_clustered(bitmap->mddev))
1031                 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1032
1033         for (i = 0; i < chunks; i++) {
1034                 int b;
1035                 index = file_page_index(&bitmap->storage, i);
1036                 bit = file_page_offset(&bitmap->storage, i);
1037                 if (index != oldindex) { /* this is a new page, read it in */
1038                         int count;
1039                         /* unmap the old page, we're done with it */
1040                         if (index == store->file_pages-1)
1041                                 count = store->bytes - index * PAGE_SIZE;
1042                         else
1043                                 count = PAGE_SIZE;
1044                         page = store->filemap[index];
1045                         if (file)
1046                                 ret = read_page(file, index, bitmap,
1047                                                 count, page);
1048                         else
1049                                 ret = read_sb_page(
1050                                         bitmap->mddev,
1051                                         bitmap->mddev->bitmap_info.offset,
1052                                         page,
1053                                         index + node_offset, count);
1054
1055                         if (ret)
1056                                 goto err;
1057
1058                         oldindex = index;
1059
1060                         if (outofdate) {
1061                                 /*
1062                                  * if bitmap is out of date, dirty the
1063                                  * whole page and write it out
1064                                  */
1065                                 paddr = kmap_atomic(page);
1066                                 memset(paddr + offset, 0xff,
1067                                        PAGE_SIZE - offset);
1068                                 kunmap_atomic(paddr);
1069                                 write_page(bitmap, page, 1);
1070
1071                                 ret = -EIO;
1072                                 if (test_bit(BITMAP_WRITE_ERROR,
1073                                              &bitmap->flags))
1074                                         goto err;
1075                         }
1076                 }
1077                 paddr = kmap_atomic(page);
1078                 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1079                         b = test_bit(bit, paddr);
1080                 else
1081                         b = test_bit_le(bit, paddr);
1082                 kunmap_atomic(paddr);
1083                 if (b) {
1084                         /* if the disk bit is set, set the memory bit */
1085                         int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1086                                       >= start);
1087                         bitmap_set_memory_bits(bitmap,
1088                                                (sector_t)i << bitmap->counts.chunkshift,
1089                                                needed);
1090                         bit_cnt++;
1091                 }
1092                 offset = 0;
1093         }
1094
1095         printk(KERN_INFO "%s: bitmap initialized from disk: "
1096                "read %lu pages, set %lu of %lu bits\n",
1097                bmname(bitmap), store->file_pages,
1098                bit_cnt, chunks);
1099
1100         return 0;
1101
1102  err:
1103         printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1104                bmname(bitmap), ret);
1105         return ret;
1106 }
1107
1108 void bitmap_write_all(struct bitmap *bitmap)
1109 {
1110         /* We don't actually write all bitmap blocks here,
1111          * just flag them as needing to be written
1112          */
1113         int i;
1114
1115         if (!bitmap || !bitmap->storage.filemap)
1116                 return;
1117         if (bitmap->storage.file)
1118                 /* Only one copy, so nothing needed */
1119                 return;
1120
1121         for (i = 0; i < bitmap->storage.file_pages; i++)
1122                 set_page_attr(bitmap, i,
1123                               BITMAP_PAGE_NEEDWRITE);
1124         bitmap->allclean = 0;
1125 }
1126
1127 static void bitmap_count_page(struct bitmap_counts *bitmap,
1128                               sector_t offset, int inc)
1129 {
1130         sector_t chunk = offset >> bitmap->chunkshift;
1131         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1132         bitmap->bp[page].count += inc;
1133         bitmap_checkfree(bitmap, page);
1134 }
1135
1136 static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1137 {
1138         sector_t chunk = offset >> bitmap->chunkshift;
1139         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1140         struct bitmap_page *bp = &bitmap->bp[page];
1141
1142         if (!bp->pending)
1143                 bp->pending = 1;
1144 }
1145
1146 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1147                                             sector_t offset, sector_t *blocks,
1148                                             int create);
1149
1150 /*
1151  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1152  *                      out to disk
1153  */
1154
1155 void bitmap_daemon_work(struct mddev *mddev)
1156 {
1157         struct bitmap *bitmap;
1158         unsigned long j;
1159         unsigned long nextpage;
1160         sector_t blocks;
1161         struct bitmap_counts *counts;
1162
1163         /* Use a mutex to guard daemon_work against
1164          * bitmap_destroy.
1165          */
1166         mutex_lock(&mddev->bitmap_info.mutex);
1167         bitmap = mddev->bitmap;
1168         if (bitmap == NULL) {
1169                 mutex_unlock(&mddev->bitmap_info.mutex);
1170                 return;
1171         }
1172         if (time_before(jiffies, bitmap->daemon_lastrun
1173                         + mddev->bitmap_info.daemon_sleep))
1174                 goto done;
1175
1176         bitmap->daemon_lastrun = jiffies;
1177         if (bitmap->allclean) {
1178                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1179                 goto done;
1180         }
1181         bitmap->allclean = 1;
1182
1183         /* Any file-page which is PENDING now needs to be written.
1184          * So set NEEDWRITE now, then after we make any last-minute changes
1185          * we will write it.
1186          */
1187         for (j = 0; j < bitmap->storage.file_pages; j++)
1188                 if (test_and_clear_page_attr(bitmap, j,
1189                                              BITMAP_PAGE_PENDING))
1190                         set_page_attr(bitmap, j,
1191                                       BITMAP_PAGE_NEEDWRITE);
1192
1193         if (bitmap->need_sync &&
1194             mddev->bitmap_info.external == 0) {
1195                 /* Arrange for superblock update as well as
1196                  * other changes */
1197                 bitmap_super_t *sb;
1198                 bitmap->need_sync = 0;
1199                 if (bitmap->storage.filemap) {
1200                         sb = kmap_atomic(bitmap->storage.sb_page);
1201                         sb->events_cleared =
1202                                 cpu_to_le64(bitmap->events_cleared);
1203                         kunmap_atomic(sb);
1204                         set_page_attr(bitmap, 0,
1205                                       BITMAP_PAGE_NEEDWRITE);
1206                 }
1207         }
1208         /* Now look at the bitmap counters and if any are '2' or '1',
1209          * decrement and handle accordingly.
1210          */
1211         counts = &bitmap->counts;
1212         spin_lock_irq(&counts->lock);
1213         nextpage = 0;
1214         for (j = 0; j < counts->chunks; j++) {
1215                 bitmap_counter_t *bmc;
1216                 sector_t  block = (sector_t)j << counts->chunkshift;
1217
1218                 if (j == nextpage) {
1219                         nextpage += PAGE_COUNTER_RATIO;
1220                         if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1221                                 j |= PAGE_COUNTER_MASK;
1222                                 continue;
1223                         }
1224                         counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1225                 }
1226                 bmc = bitmap_get_counter(counts,
1227                                          block,
1228                                          &blocks, 0);
1229
1230                 if (!bmc) {
1231                         j |= PAGE_COUNTER_MASK;
1232                         continue;
1233                 }
1234                 if (*bmc == 1 && !bitmap->need_sync) {
1235                         /* We can clear the bit */
1236                         *bmc = 0;
1237                         bitmap_count_page(counts, block, -1);
1238                         bitmap_file_clear_bit(bitmap, block);
1239                 } else if (*bmc && *bmc <= 2) {
1240                         *bmc = 1;
1241                         bitmap_set_pending(counts, block);
1242                         bitmap->allclean = 0;
1243                 }
1244         }
1245         spin_unlock_irq(&counts->lock);
1246
1247         /* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1248          * DIRTY pages need to be written by bitmap_unplug so it can wait
1249          * for them.
1250          * If we find any DIRTY page we stop there and let bitmap_unplug
1251          * handle all the rest.  This is important in the case where
1252          * the first blocking holds the superblock and it has been updated.
1253          * We mustn't write any other blocks before the superblock.
1254          */
1255         for (j = 0;
1256              j < bitmap->storage.file_pages
1257                      && !test_bit(BITMAP_STALE, &bitmap->flags);
1258              j++) {
1259                 if (test_page_attr(bitmap, j,
1260                                    BITMAP_PAGE_DIRTY))
1261                         /* bitmap_unplug will handle the rest */
1262                         break;
1263                 if (test_and_clear_page_attr(bitmap, j,
1264                                              BITMAP_PAGE_NEEDWRITE)) {
1265                         write_page(bitmap, bitmap->storage.filemap[j], 0);
1266                 }
1267         }
1268
1269  done:
1270         if (bitmap->allclean == 0)
1271                 mddev->thread->timeout =
1272                         mddev->bitmap_info.daemon_sleep;
1273         mutex_unlock(&mddev->bitmap_info.mutex);
1274 }
1275
1276 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1277                                             sector_t offset, sector_t *blocks,
1278                                             int create)
1279 __releases(bitmap->lock)
1280 __acquires(bitmap->lock)
1281 {
1282         /* If 'create', we might release the lock and reclaim it.
1283          * The lock must have been taken with interrupts enabled.
1284          * If !create, we don't release the lock.
1285          */
1286         sector_t chunk = offset >> bitmap->chunkshift;
1287         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1288         unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1289         sector_t csize;
1290         int err;
1291
1292         err = bitmap_checkpage(bitmap, page, create);
1293
1294         if (bitmap->bp[page].hijacked ||
1295             bitmap->bp[page].map == NULL)
1296                 csize = ((sector_t)1) << (bitmap->chunkshift +
1297                                           PAGE_COUNTER_SHIFT - 1);
1298         else
1299                 csize = ((sector_t)1) << bitmap->chunkshift;
1300         *blocks = csize - (offset & (csize - 1));
1301
1302         if (err < 0)
1303                 return NULL;
1304
1305         /* now locked ... */
1306
1307         if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1308                 /* should we use the first or second counter field
1309                  * of the hijacked pointer? */
1310                 int hi = (pageoff > PAGE_COUNTER_MASK);
1311                 return  &((bitmap_counter_t *)
1312                           &bitmap->bp[page].map)[hi];
1313         } else /* page is allocated */
1314                 return (bitmap_counter_t *)
1315                         &(bitmap->bp[page].map[pageoff]);
1316 }
1317
1318 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1319 {
1320         if (!bitmap)
1321                 return 0;
1322
1323         if (behind) {
1324                 int bw;
1325                 atomic_inc(&bitmap->behind_writes);
1326                 bw = atomic_read(&bitmap->behind_writes);
1327                 if (bw > bitmap->behind_writes_used)
1328                         bitmap->behind_writes_used = bw;
1329
1330                 pr_debug("inc write-behind count %d/%lu\n",
1331                          bw, bitmap->mddev->bitmap_info.max_write_behind);
1332         }
1333
1334         while (sectors) {
1335                 sector_t blocks;
1336                 bitmap_counter_t *bmc;
1337
1338                 spin_lock_irq(&bitmap->counts.lock);
1339                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1340                 if (!bmc) {
1341                         spin_unlock_irq(&bitmap->counts.lock);
1342                         return 0;
1343                 }
1344
1345                 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1346                         DEFINE_WAIT(__wait);
1347                         /* note that it is safe to do the prepare_to_wait
1348                          * after the test as long as we do it before dropping
1349                          * the spinlock.
1350                          */
1351                         prepare_to_wait(&bitmap->overflow_wait, &__wait,
1352                                         TASK_UNINTERRUPTIBLE);
1353                         spin_unlock_irq(&bitmap->counts.lock);
1354                         schedule();
1355                         finish_wait(&bitmap->overflow_wait, &__wait);
1356                         continue;
1357                 }
1358
1359                 switch (*bmc) {
1360                 case 0:
1361                         bitmap_file_set_bit(bitmap, offset);
1362                         bitmap_count_page(&bitmap->counts, offset, 1);
1363                         /* fall through */
1364                 case 1:
1365                         *bmc = 2;
1366                 }
1367
1368                 (*bmc)++;
1369
1370                 spin_unlock_irq(&bitmap->counts.lock);
1371
1372                 offset += blocks;
1373                 if (sectors > blocks)
1374                         sectors -= blocks;
1375                 else
1376                         sectors = 0;
1377         }
1378         return 0;
1379 }
1380 EXPORT_SYMBOL(bitmap_startwrite);
1381
1382 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1383                      int success, int behind)
1384 {
1385         if (!bitmap)
1386                 return;
1387         if (behind) {
1388                 if (atomic_dec_and_test(&bitmap->behind_writes))
1389                         wake_up(&bitmap->behind_wait);
1390                 pr_debug("dec write-behind count %d/%lu\n",
1391                          atomic_read(&bitmap->behind_writes),
1392                          bitmap->mddev->bitmap_info.max_write_behind);
1393         }
1394
1395         while (sectors) {
1396                 sector_t blocks;
1397                 unsigned long flags;
1398                 bitmap_counter_t *bmc;
1399
1400                 spin_lock_irqsave(&bitmap->counts.lock, flags);
1401                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1402                 if (!bmc) {
1403                         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1404                         return;
1405                 }
1406
1407                 if (success && !bitmap->mddev->degraded &&
1408                     bitmap->events_cleared < bitmap->mddev->events) {
1409                         bitmap->events_cleared = bitmap->mddev->events;
1410                         bitmap->need_sync = 1;
1411                         sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1412                 }
1413
1414                 if (!success && !NEEDED(*bmc))
1415                         *bmc |= NEEDED_MASK;
1416
1417                 if (COUNTER(*bmc) == COUNTER_MAX)
1418                         wake_up(&bitmap->overflow_wait);
1419
1420                 (*bmc)--;
1421                 if (*bmc <= 2) {
1422                         bitmap_set_pending(&bitmap->counts, offset);
1423                         bitmap->allclean = 0;
1424                 }
1425                 spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1426                 offset += blocks;
1427                 if (sectors > blocks)
1428                         sectors -= blocks;
1429                 else
1430                         sectors = 0;
1431         }
1432 }
1433 EXPORT_SYMBOL(bitmap_endwrite);
1434
1435 static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1436                                int degraded)
1437 {
1438         bitmap_counter_t *bmc;
1439         int rv;
1440         if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1441                 *blocks = 1024;
1442                 return 1; /* always resync if no bitmap */
1443         }
1444         spin_lock_irq(&bitmap->counts.lock);
1445         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1446         rv = 0;
1447         if (bmc) {
1448                 /* locked */
1449                 if (RESYNC(*bmc))
1450                         rv = 1;
1451                 else if (NEEDED(*bmc)) {
1452                         rv = 1;
1453                         if (!degraded) { /* don't set/clear bits if degraded */
1454                                 *bmc |= RESYNC_MASK;
1455                                 *bmc &= ~NEEDED_MASK;
1456                         }
1457                 }
1458         }
1459         spin_unlock_irq(&bitmap->counts.lock);
1460         return rv;
1461 }
1462
1463 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1464                       int degraded)
1465 {
1466         /* bitmap_start_sync must always report on multiples of whole
1467          * pages, otherwise resync (which is very PAGE_SIZE based) will
1468          * get confused.
1469          * So call __bitmap_start_sync repeatedly (if needed) until
1470          * At least PAGE_SIZE>>9 blocks are covered.
1471          * Return the 'or' of the result.
1472          */
1473         int rv = 0;
1474         sector_t blocks1;
1475
1476         *blocks = 0;
1477         while (*blocks < (PAGE_SIZE>>9)) {
1478                 rv |= __bitmap_start_sync(bitmap, offset,
1479                                           &blocks1, degraded);
1480                 offset += blocks1;
1481                 *blocks += blocks1;
1482         }
1483         return rv;
1484 }
1485 EXPORT_SYMBOL(bitmap_start_sync);
1486
1487 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1488 {
1489         bitmap_counter_t *bmc;
1490         unsigned long flags;
1491
1492         if (bitmap == NULL) {
1493                 *blocks = 1024;
1494                 return;
1495         }
1496         spin_lock_irqsave(&bitmap->counts.lock, flags);
1497         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1498         if (bmc == NULL)
1499                 goto unlock;
1500         /* locked */
1501         if (RESYNC(*bmc)) {
1502                 *bmc &= ~RESYNC_MASK;
1503
1504                 if (!NEEDED(*bmc) && aborted)
1505                         *bmc |= NEEDED_MASK;
1506                 else {
1507                         if (*bmc <= 2) {
1508                                 bitmap_set_pending(&bitmap->counts, offset);
1509                                 bitmap->allclean = 0;
1510                         }
1511                 }
1512         }
1513  unlock:
1514         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1515 }
1516 EXPORT_SYMBOL(bitmap_end_sync);
1517
1518 void bitmap_close_sync(struct bitmap *bitmap)
1519 {
1520         /* Sync has finished, and any bitmap chunks that weren't synced
1521          * properly have been aborted.  It remains to us to clear the
1522          * RESYNC bit wherever it is still on
1523          */
1524         sector_t sector = 0;
1525         sector_t blocks;
1526         if (!bitmap)
1527                 return;
1528         while (sector < bitmap->mddev->resync_max_sectors) {
1529                 bitmap_end_sync(bitmap, sector, &blocks, 0);
1530                 sector += blocks;
1531         }
1532 }
1533 EXPORT_SYMBOL(bitmap_close_sync);
1534
1535 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1536 {
1537         sector_t s = 0;
1538         sector_t blocks;
1539
1540         if (!bitmap)
1541                 return;
1542         if (sector == 0) {
1543                 bitmap->last_end_sync = jiffies;
1544                 return;
1545         }
1546         if (time_before(jiffies, (bitmap->last_end_sync
1547                                   + bitmap->mddev->bitmap_info.daemon_sleep)))
1548                 return;
1549         wait_event(bitmap->mddev->recovery_wait,
1550                    atomic_read(&bitmap->mddev->recovery_active) == 0);
1551
1552         bitmap->mddev->curr_resync_completed = sector;
1553         set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1554         sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1555         s = 0;
1556         while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1557                 bitmap_end_sync(bitmap, s, &blocks, 0);
1558                 s += blocks;
1559         }
1560         bitmap->last_end_sync = jiffies;
1561         sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1562 }
1563 EXPORT_SYMBOL(bitmap_cond_end_sync);
1564
1565 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1566 {
1567         /* For each chunk covered by any of these sectors, set the
1568          * counter to 2 and possibly set resync_needed.  They should all
1569          * be 0 at this point
1570          */
1571
1572         sector_t secs;
1573         bitmap_counter_t *bmc;
1574         spin_lock_irq(&bitmap->counts.lock);
1575         bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1576         if (!bmc) {
1577                 spin_unlock_irq(&bitmap->counts.lock);
1578                 return;
1579         }
1580         if (!*bmc) {
1581                 *bmc = 2 | (needed ? NEEDED_MASK : 0);
1582                 bitmap_count_page(&bitmap->counts, offset, 1);
1583                 bitmap_set_pending(&bitmap->counts, offset);
1584                 bitmap->allclean = 0;
1585         }
1586         spin_unlock_irq(&bitmap->counts.lock);
1587 }
1588
1589 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
1590 void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1591 {
1592         unsigned long chunk;
1593
1594         for (chunk = s; chunk <= e; chunk++) {
1595                 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1596                 bitmap_set_memory_bits(bitmap, sec, 1);
1597                 bitmap_file_set_bit(bitmap, sec);
1598                 if (sec < bitmap->mddev->recovery_cp)
1599                         /* We are asserting that the array is dirty,
1600                          * so move the recovery_cp address back so
1601                          * that it is obvious that it is dirty
1602                          */
1603                         bitmap->mddev->recovery_cp = sec;
1604         }
1605 }
1606
1607 /*
1608  * flush out any pending updates
1609  */
1610 void bitmap_flush(struct mddev *mddev)
1611 {
1612         struct bitmap *bitmap = mddev->bitmap;
1613         long sleep;
1614
1615         if (!bitmap) /* there was no bitmap */
1616                 return;
1617
1618         /* run the daemon_work three time to ensure everything is flushed
1619          * that can be
1620          */
1621         sleep = mddev->bitmap_info.daemon_sleep * 2;
1622         bitmap->daemon_lastrun -= sleep;
1623         bitmap_daemon_work(mddev);
1624         bitmap->daemon_lastrun -= sleep;
1625         bitmap_daemon_work(mddev);
1626         bitmap->daemon_lastrun -= sleep;
1627         bitmap_daemon_work(mddev);
1628         bitmap_update_sb(bitmap);
1629 }
1630
1631 /*
1632  * free memory that was allocated
1633  */
1634 static void bitmap_free(struct bitmap *bitmap)
1635 {
1636         unsigned long k, pages;
1637         struct bitmap_page *bp;
1638
1639         if (!bitmap) /* there was no bitmap */
1640                 return;
1641
1642         if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info)
1643                 md_cluster_stop(bitmap->mddev);
1644
1645         /* Shouldn't be needed - but just in case.... */
1646         wait_event(bitmap->write_wait,
1647                    atomic_read(&bitmap->pending_writes) == 0);
1648
1649         /* release the bitmap file  */
1650         bitmap_file_unmap(&bitmap->storage);
1651
1652         bp = bitmap->counts.bp;
1653         pages = bitmap->counts.pages;
1654
1655         /* free all allocated memory */
1656
1657         if (bp) /* deallocate the page memory */
1658                 for (k = 0; k < pages; k++)
1659                         if (bp[k].map && !bp[k].hijacked)
1660                                 kfree(bp[k].map);
1661         kfree(bp);
1662         kfree(bitmap);
1663 }
1664
1665 void bitmap_destroy(struct mddev *mddev)
1666 {
1667         struct bitmap *bitmap = mddev->bitmap;
1668
1669         if (!bitmap) /* there was no bitmap */
1670                 return;
1671
1672         mutex_lock(&mddev->bitmap_info.mutex);
1673         spin_lock(&mddev->lock);
1674         mddev->bitmap = NULL; /* disconnect from the md device */
1675         spin_unlock(&mddev->lock);
1676         mutex_unlock(&mddev->bitmap_info.mutex);
1677         if (mddev->thread)
1678                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1679
1680         if (bitmap->sysfs_can_clear)
1681                 sysfs_put(bitmap->sysfs_can_clear);
1682
1683         bitmap_free(bitmap);
1684 }
1685
1686 /*
1687  * initialize the bitmap structure
1688  * if this returns an error, bitmap_destroy must be called to do clean up
1689  */
1690 int bitmap_create(struct mddev *mddev)
1691 {
1692         struct bitmap *bitmap;
1693         sector_t blocks = mddev->resync_max_sectors;
1694         struct file *file = mddev->bitmap_info.file;
1695         int err;
1696         struct kernfs_node *bm = NULL;
1697
1698         BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1699
1700         BUG_ON(file && mddev->bitmap_info.offset);
1701
1702         bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1703         if (!bitmap)
1704                 return -ENOMEM;
1705
1706         spin_lock_init(&bitmap->counts.lock);
1707         atomic_set(&bitmap->pending_writes, 0);
1708         init_waitqueue_head(&bitmap->write_wait);
1709         init_waitqueue_head(&bitmap->overflow_wait);
1710         init_waitqueue_head(&bitmap->behind_wait);
1711
1712         bitmap->mddev = mddev;
1713
1714         if (mddev->kobj.sd)
1715                 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1716         if (bm) {
1717                 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1718                 sysfs_put(bm);
1719         } else
1720                 bitmap->sysfs_can_clear = NULL;
1721
1722         bitmap->storage.file = file;
1723         if (file) {
1724                 get_file(file);
1725                 /* As future accesses to this file will use bmap,
1726                  * and bypass the page cache, we must sync the file
1727                  * first.
1728                  */
1729                 vfs_fsync(file, 1);
1730         }
1731         /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1732         if (!mddev->bitmap_info.external) {
1733                 /*
1734                  * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1735                  * instructing us to create a new on-disk bitmap instance.
1736                  */
1737                 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1738                         err = bitmap_new_disk_sb(bitmap);
1739                 else
1740                         err = bitmap_read_sb(bitmap);
1741         } else {
1742                 err = 0;
1743                 if (mddev->bitmap_info.chunksize == 0 ||
1744                     mddev->bitmap_info.daemon_sleep == 0)
1745                         /* chunksize and time_base need to be
1746                          * set first. */
1747                         err = -EINVAL;
1748         }
1749         if (err)
1750                 goto error;
1751
1752         bitmap->daemon_lastrun = jiffies;
1753         err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
1754         if (err)
1755                 goto error;
1756
1757         printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1758                bitmap->counts.pages, bmname(bitmap));
1759
1760         mddev->bitmap = bitmap;
1761         return test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
1762
1763  error:
1764         bitmap_free(bitmap);
1765         return err;
1766 }
1767
1768 int bitmap_load(struct mddev *mddev)
1769 {
1770         int err = 0;
1771         sector_t start = 0;
1772         sector_t sector = 0;
1773         struct bitmap *bitmap = mddev->bitmap;
1774
1775         if (!bitmap)
1776                 goto out;
1777
1778         /* Clear out old bitmap info first:  Either there is none, or we
1779          * are resuming after someone else has possibly changed things,
1780          * so we should forget old cached info.
1781          * All chunks should be clean, but some might need_sync.
1782          */
1783         while (sector < mddev->resync_max_sectors) {
1784                 sector_t blocks;
1785                 bitmap_start_sync(bitmap, sector, &blocks, 0);
1786                 sector += blocks;
1787         }
1788         bitmap_close_sync(bitmap);
1789
1790         if (mddev->degraded == 0
1791             || bitmap->events_cleared == mddev->events)
1792                 /* no need to keep dirty bits to optimise a
1793                  * re-add of a missing device */
1794                 start = mddev->recovery_cp;
1795
1796         mutex_lock(&mddev->bitmap_info.mutex);
1797         err = bitmap_init_from_disk(bitmap, start);
1798         mutex_unlock(&mddev->bitmap_info.mutex);
1799
1800         if (err)
1801                 goto out;
1802         clear_bit(BITMAP_STALE, &bitmap->flags);
1803
1804         /* Kick recovery in case any bits were set */
1805         set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1806
1807         mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1808         md_wakeup_thread(mddev->thread);
1809
1810         bitmap_update_sb(bitmap);
1811
1812         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1813                 err = -EIO;
1814 out:
1815         return err;
1816 }
1817 EXPORT_SYMBOL_GPL(bitmap_load);
1818
1819 void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
1820 {
1821         unsigned long chunk_kb;
1822         struct bitmap_counts *counts;
1823
1824         if (!bitmap)
1825                 return;
1826
1827         counts = &bitmap->counts;
1828
1829         chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
1830         seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
1831                    "%lu%s chunk",
1832                    counts->pages - counts->missing_pages,
1833                    counts->pages,
1834                    (counts->pages - counts->missing_pages)
1835                    << (PAGE_SHIFT - 10),
1836                    chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
1837                    chunk_kb ? "KB" : "B");
1838         if (bitmap->storage.file) {
1839                 seq_printf(seq, ", file: ");
1840                 seq_path(seq, &bitmap->storage.file->f_path, " \t\n");
1841         }
1842
1843         seq_printf(seq, "\n");
1844 }
1845
1846 int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
1847                   int chunksize, int init)
1848 {
1849         /* If chunk_size is 0, choose an appropriate chunk size.
1850          * Then possibly allocate new storage space.
1851          * Then quiesce, copy bits, replace bitmap, and re-start
1852          *
1853          * This function is called both to set up the initial bitmap
1854          * and to resize the bitmap while the array is active.
1855          * If this happens as a result of the array being resized,
1856          * chunksize will be zero, and we need to choose a suitable
1857          * chunksize, otherwise we use what we are given.
1858          */
1859         struct bitmap_storage store;
1860         struct bitmap_counts old_counts;
1861         unsigned long chunks;
1862         sector_t block;
1863         sector_t old_blocks, new_blocks;
1864         int chunkshift;
1865         int ret = 0;
1866         long pages;
1867         struct bitmap_page *new_bp;
1868
1869         if (chunksize == 0) {
1870                 /* If there is enough space, leave the chunk size unchanged,
1871                  * else increase by factor of two until there is enough space.
1872                  */
1873                 long bytes;
1874                 long space = bitmap->mddev->bitmap_info.space;
1875
1876                 if (space == 0) {
1877                         /* We don't know how much space there is, so limit
1878                          * to current size - in sectors.
1879                          */
1880                         bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
1881                         if (!bitmap->mddev->bitmap_info.external)
1882                                 bytes += sizeof(bitmap_super_t);
1883                         space = DIV_ROUND_UP(bytes, 512);
1884                         bitmap->mddev->bitmap_info.space = space;
1885                 }
1886                 chunkshift = bitmap->counts.chunkshift;
1887                 chunkshift--;
1888                 do {
1889                         /* 'chunkshift' is shift from block size to chunk size */
1890                         chunkshift++;
1891                         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1892                         bytes = DIV_ROUND_UP(chunks, 8);
1893                         if (!bitmap->mddev->bitmap_info.external)
1894                                 bytes += sizeof(bitmap_super_t);
1895                 } while (bytes > (space << 9));
1896         } else
1897                 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
1898
1899         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1900         memset(&store, 0, sizeof(store));
1901         if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
1902                 ret = bitmap_storage_alloc(&store, chunks,
1903                                            !bitmap->mddev->bitmap_info.external,
1904                                            bitmap->cluster_slot);
1905         if (ret)
1906                 goto err;
1907
1908         pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
1909
1910         new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
1911         ret = -ENOMEM;
1912         if (!new_bp) {
1913                 bitmap_file_unmap(&store);
1914                 goto err;
1915         }
1916
1917         if (!init)
1918                 bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
1919
1920         store.file = bitmap->storage.file;
1921         bitmap->storage.file = NULL;
1922
1923         if (store.sb_page && bitmap->storage.sb_page)
1924                 memcpy(page_address(store.sb_page),
1925                        page_address(bitmap->storage.sb_page),
1926                        sizeof(bitmap_super_t));
1927         bitmap_file_unmap(&bitmap->storage);
1928         bitmap->storage = store;
1929
1930         old_counts = bitmap->counts;
1931         bitmap->counts.bp = new_bp;
1932         bitmap->counts.pages = pages;
1933         bitmap->counts.missing_pages = pages;
1934         bitmap->counts.chunkshift = chunkshift;
1935         bitmap->counts.chunks = chunks;
1936         bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
1937                                                      BITMAP_BLOCK_SHIFT);
1938
1939         blocks = min(old_counts.chunks << old_counts.chunkshift,
1940                      chunks << chunkshift);
1941
1942         spin_lock_irq(&bitmap->counts.lock);
1943         for (block = 0; block < blocks; ) {
1944                 bitmap_counter_t *bmc_old, *bmc_new;
1945                 int set;
1946
1947                 bmc_old = bitmap_get_counter(&old_counts, block,
1948                                              &old_blocks, 0);
1949                 set = bmc_old && NEEDED(*bmc_old);
1950
1951                 if (set) {
1952                         bmc_new = bitmap_get_counter(&bitmap->counts, block,
1953                                                      &new_blocks, 1);
1954                         if (*bmc_new == 0) {
1955                                 /* need to set on-disk bits too. */
1956                                 sector_t end = block + new_blocks;
1957                                 sector_t start = block >> chunkshift;
1958                                 start <<= chunkshift;
1959                                 while (start < end) {
1960                                         bitmap_file_set_bit(bitmap, block);
1961                                         start += 1 << chunkshift;
1962                                 }
1963                                 *bmc_new = 2;
1964                                 bitmap_count_page(&bitmap->counts,
1965                                                   block, 1);
1966                                 bitmap_set_pending(&bitmap->counts,
1967                                                    block);
1968                         }
1969                         *bmc_new |= NEEDED_MASK;
1970                         if (new_blocks < old_blocks)
1971                                 old_blocks = new_blocks;
1972                 }
1973                 block += old_blocks;
1974         }
1975
1976         if (!init) {
1977                 int i;
1978                 while (block < (chunks << chunkshift)) {
1979                         bitmap_counter_t *bmc;
1980                         bmc = bitmap_get_counter(&bitmap->counts, block,
1981                                                  &new_blocks, 1);
1982                         if (bmc) {
1983                                 /* new space.  It needs to be resynced, so
1984                                  * we set NEEDED_MASK.
1985                                  */
1986                                 if (*bmc == 0) {
1987                                         *bmc = NEEDED_MASK | 2;
1988                                         bitmap_count_page(&bitmap->counts,
1989                                                           block, 1);
1990                                         bitmap_set_pending(&bitmap->counts,
1991                                                            block);
1992                                 }
1993                         }
1994                         block += new_blocks;
1995                 }
1996                 for (i = 0; i < bitmap->storage.file_pages; i++)
1997                         set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1998         }
1999         spin_unlock_irq(&bitmap->counts.lock);
2000
2001         if (!init) {
2002                 bitmap_unplug(bitmap);
2003                 bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2004         }
2005         ret = 0;
2006 err:
2007         return ret;
2008 }
2009 EXPORT_SYMBOL_GPL(bitmap_resize);
2010
2011 static ssize_t
2012 location_show(struct mddev *mddev, char *page)
2013 {
2014         ssize_t len;
2015         if (mddev->bitmap_info.file)
2016                 len = sprintf(page, "file");
2017         else if (mddev->bitmap_info.offset)
2018                 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2019         else
2020                 len = sprintf(page, "none");
2021         len += sprintf(page+len, "\n");
2022         return len;
2023 }
2024
2025 static ssize_t
2026 location_store(struct mddev *mddev, const char *buf, size_t len)
2027 {
2028
2029         if (mddev->pers) {
2030                 if (!mddev->pers->quiesce)
2031                         return -EBUSY;
2032                 if (mddev->recovery || mddev->sync_thread)
2033                         return -EBUSY;
2034         }
2035
2036         if (mddev->bitmap || mddev->bitmap_info.file ||
2037             mddev->bitmap_info.offset) {
2038                 /* bitmap already configured.  Only option is to clear it */
2039                 if (strncmp(buf, "none", 4) != 0)
2040                         return -EBUSY;
2041                 if (mddev->pers) {
2042                         mddev->pers->quiesce(mddev, 1);
2043                         bitmap_destroy(mddev);
2044                         mddev->pers->quiesce(mddev, 0);
2045                 }
2046                 mddev->bitmap_info.offset = 0;
2047                 if (mddev->bitmap_info.file) {
2048                         struct file *f = mddev->bitmap_info.file;
2049                         mddev->bitmap_info.file = NULL;
2050                         fput(f);
2051                 }
2052         } else {
2053                 /* No bitmap, OK to set a location */
2054                 long long offset;
2055                 if (strncmp(buf, "none", 4) == 0)
2056                         /* nothing to be done */;
2057                 else if (strncmp(buf, "file:", 5) == 0) {
2058                         /* Not supported yet */
2059                         return -EINVAL;
2060                 } else {
2061                         int rv;
2062                         if (buf[0] == '+')
2063                                 rv = kstrtoll(buf+1, 10, &offset);
2064                         else
2065                                 rv = kstrtoll(buf, 10, &offset);
2066                         if (rv)
2067                                 return rv;
2068                         if (offset == 0)
2069                                 return -EINVAL;
2070                         if (mddev->bitmap_info.external == 0 &&
2071                             mddev->major_version == 0 &&
2072                             offset != mddev->bitmap_info.default_offset)
2073                                 return -EINVAL;
2074                         mddev->bitmap_info.offset = offset;
2075                         if (mddev->pers) {
2076                                 mddev->pers->quiesce(mddev, 1);
2077                                 rv = bitmap_create(mddev);
2078                                 if (!rv)
2079                                         rv = bitmap_load(mddev);
2080                                 if (rv) {
2081                                         bitmap_destroy(mddev);
2082                                         mddev->bitmap_info.offset = 0;
2083                                 }
2084                                 mddev->pers->quiesce(mddev, 0);
2085                                 if (rv)
2086                                         return rv;
2087                         }
2088                 }
2089         }
2090         if (!mddev->external) {
2091                 /* Ensure new bitmap info is stored in
2092                  * metadata promptly.
2093                  */
2094                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2095                 md_wakeup_thread(mddev->thread);
2096         }
2097         return len;
2098 }
2099
2100 static struct md_sysfs_entry bitmap_location =
2101 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2102
2103 /* 'bitmap/space' is the space available at 'location' for the
2104  * bitmap.  This allows the kernel to know when it is safe to
2105  * resize the bitmap to match a resized array.
2106  */
2107 static ssize_t
2108 space_show(struct mddev *mddev, char *page)
2109 {
2110         return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2111 }
2112
2113 static ssize_t
2114 space_store(struct mddev *mddev, const char *buf, size_t len)
2115 {
2116         unsigned long sectors;
2117         int rv;
2118
2119         rv = kstrtoul(buf, 10, &sectors);
2120         if (rv)
2121                 return rv;
2122
2123         if (sectors == 0)
2124                 return -EINVAL;
2125
2126         if (mddev->bitmap &&
2127             sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2128                 return -EFBIG; /* Bitmap is too big for this small space */
2129
2130         /* could make sure it isn't too big, but that isn't really
2131          * needed - user-space should be careful.
2132          */
2133         mddev->bitmap_info.space = sectors;
2134         return len;
2135 }
2136
2137 static struct md_sysfs_entry bitmap_space =
2138 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2139
2140 static ssize_t
2141 timeout_show(struct mddev *mddev, char *page)
2142 {
2143         ssize_t len;
2144         unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2145         unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2146
2147         len = sprintf(page, "%lu", secs);
2148         if (jifs)
2149                 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2150         len += sprintf(page+len, "\n");
2151         return len;
2152 }
2153
2154 static ssize_t
2155 timeout_store(struct mddev *mddev, const char *buf, size_t len)
2156 {
2157         /* timeout can be set at any time */
2158         unsigned long timeout;
2159         int rv = strict_strtoul_scaled(buf, &timeout, 4);
2160         if (rv)
2161                 return rv;
2162
2163         /* just to make sure we don't overflow... */
2164         if (timeout >= LONG_MAX / HZ)
2165                 return -EINVAL;
2166
2167         timeout = timeout * HZ / 10000;
2168
2169         if (timeout >= MAX_SCHEDULE_TIMEOUT)
2170                 timeout = MAX_SCHEDULE_TIMEOUT-1;
2171         if (timeout < 1)
2172                 timeout = 1;
2173         mddev->bitmap_info.daemon_sleep = timeout;
2174         if (mddev->thread) {
2175                 /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
2176                  * the bitmap is all clean and we don't need to
2177                  * adjust the timeout right now
2178                  */
2179                 if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
2180                         mddev->thread->timeout = timeout;
2181                         md_wakeup_thread(mddev->thread);
2182                 }
2183         }
2184         return len;
2185 }
2186
2187 static struct md_sysfs_entry bitmap_timeout =
2188 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2189
2190 static ssize_t
2191 backlog_show(struct mddev *mddev, char *page)
2192 {
2193         return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2194 }
2195
2196 static ssize_t
2197 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2198 {
2199         unsigned long backlog;
2200         int rv = kstrtoul(buf, 10, &backlog);
2201         if (rv)
2202                 return rv;
2203         if (backlog > COUNTER_MAX)
2204                 return -EINVAL;
2205         mddev->bitmap_info.max_write_behind = backlog;
2206         return len;
2207 }
2208
2209 static struct md_sysfs_entry bitmap_backlog =
2210 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2211
2212 static ssize_t
2213 chunksize_show(struct mddev *mddev, char *page)
2214 {
2215         return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2216 }
2217
2218 static ssize_t
2219 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2220 {
2221         /* Can only be changed when no bitmap is active */
2222         int rv;
2223         unsigned long csize;
2224         if (mddev->bitmap)
2225                 return -EBUSY;
2226         rv = kstrtoul(buf, 10, &csize);
2227         if (rv)
2228                 return rv;
2229         if (csize < 512 ||
2230             !is_power_of_2(csize))
2231                 return -EINVAL;
2232         mddev->bitmap_info.chunksize = csize;
2233         return len;
2234 }
2235
2236 static struct md_sysfs_entry bitmap_chunksize =
2237 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2238
2239 static ssize_t metadata_show(struct mddev *mddev, char *page)
2240 {
2241         if (mddev_is_clustered(mddev))
2242                 return sprintf(page, "clustered\n");
2243         return sprintf(page, "%s\n", (mddev->bitmap_info.external
2244                                       ? "external" : "internal"));
2245 }
2246
2247 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2248 {
2249         if (mddev->bitmap ||
2250             mddev->bitmap_info.file ||
2251             mddev->bitmap_info.offset)
2252                 return -EBUSY;
2253         if (strncmp(buf, "external", 8) == 0)
2254                 mddev->bitmap_info.external = 1;
2255         else if ((strncmp(buf, "internal", 8) == 0) ||
2256                         (strncmp(buf, "clustered", 9) == 0))
2257                 mddev->bitmap_info.external = 0;
2258         else
2259                 return -EINVAL;
2260         return len;
2261 }
2262
2263 static struct md_sysfs_entry bitmap_metadata =
2264 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2265
2266 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2267 {
2268         int len;
2269         spin_lock(&mddev->lock);
2270         if (mddev->bitmap)
2271                 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2272                                              "false" : "true"));
2273         else
2274                 len = sprintf(page, "\n");
2275         spin_unlock(&mddev->lock);
2276         return len;
2277 }
2278
2279 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2280 {
2281         if (mddev->bitmap == NULL)
2282                 return -ENOENT;
2283         if (strncmp(buf, "false", 5) == 0)
2284                 mddev->bitmap->need_sync = 1;
2285         else if (strncmp(buf, "true", 4) == 0) {
2286                 if (mddev->degraded)
2287                         return -EBUSY;
2288                 mddev->bitmap->need_sync = 0;
2289         } else
2290                 return -EINVAL;
2291         return len;
2292 }
2293
2294 static struct md_sysfs_entry bitmap_can_clear =
2295 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2296
2297 static ssize_t
2298 behind_writes_used_show(struct mddev *mddev, char *page)
2299 {
2300         ssize_t ret;
2301         spin_lock(&mddev->lock);
2302         if (mddev->bitmap == NULL)
2303                 ret = sprintf(page, "0\n");
2304         else
2305                 ret = sprintf(page, "%lu\n",
2306                               mddev->bitmap->behind_writes_used);
2307         spin_unlock(&mddev->lock);
2308         return ret;
2309 }
2310
2311 static ssize_t
2312 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2313 {
2314         if (mddev->bitmap)
2315                 mddev->bitmap->behind_writes_used = 0;
2316         return len;
2317 }
2318
2319 static struct md_sysfs_entry max_backlog_used =
2320 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2321        behind_writes_used_show, behind_writes_used_reset);
2322
2323 static struct attribute *md_bitmap_attrs[] = {
2324         &bitmap_location.attr,
2325         &bitmap_space.attr,
2326         &bitmap_timeout.attr,
2327         &bitmap_backlog.attr,
2328         &bitmap_chunksize.attr,
2329         &bitmap_metadata.attr,
2330         &bitmap_can_clear.attr,
2331         &max_backlog_used.attr,
2332         NULL
2333 };
2334 struct attribute_group md_bitmap_group = {
2335         .name = "bitmap",
2336         .attrs = md_bitmap_attrs,
2337 };
2338