Merge branch 'nfs-for-3.3' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
[firefly-linux-kernel-4.4.55.git] / kernel / power / swap.c
1 /*
2  * linux/kernel/power/swap.c
3  *
4  * This file provides functions for reading the suspend image from
5  * and writing it to a swap partition.
6  *
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
8  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9  * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>
10  *
11  * This file is released under the GPLv2.
12  *
13  */
14
15 #include <linux/module.h>
16 #include <linux/file.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/genhd.h>
20 #include <linux/device.h>
21 #include <linux/bio.h>
22 #include <linux/blkdev.h>
23 #include <linux/swap.h>
24 #include <linux/swapops.h>
25 #include <linux/pm.h>
26 #include <linux/slab.h>
27 #include <linux/lzo.h>
28 #include <linux/vmalloc.h>
29 #include <linux/cpumask.h>
30 #include <linux/atomic.h>
31 #include <linux/kthread.h>
32 #include <linux/crc32.h>
33
34 #include "power.h"
35
36 #define HIBERNATE_SIG   "S1SUSPEND"
37
38 /*
39  *      The swap map is a data structure used for keeping track of each page
40  *      written to a swap partition.  It consists of many swap_map_page
41  *      structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
42  *      These structures are stored on the swap and linked together with the
43  *      help of the .next_swap member.
44  *
45  *      The swap map is created during suspend.  The swap map pages are
46  *      allocated and populated one at a time, so we only need one memory
47  *      page to set up the entire structure.
48  *
49  *      During resume we pick up all swap_map_page structures into a list.
50  */
51
52 #define MAP_PAGE_ENTRIES        (PAGE_SIZE / sizeof(sector_t) - 1)
53
54 struct swap_map_page {
55         sector_t entries[MAP_PAGE_ENTRIES];
56         sector_t next_swap;
57 };
58
59 struct swap_map_page_list {
60         struct swap_map_page *map;
61         struct swap_map_page_list *next;
62 };
63
64 /**
65  *      The swap_map_handle structure is used for handling swap in
66  *      a file-alike way
67  */
68
69 struct swap_map_handle {
70         struct swap_map_page *cur;
71         struct swap_map_page_list *maps;
72         sector_t cur_swap;
73         sector_t first_sector;
74         unsigned int k;
75         unsigned long nr_free_pages, written;
76         u32 crc32;
77 };
78
79 struct swsusp_header {
80         char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
81                       sizeof(u32)];
82         u32     crc32;
83         sector_t image;
84         unsigned int flags;     /* Flags to pass to the "boot" kernel */
85         char    orig_sig[10];
86         char    sig[10];
87 } __attribute__((packed));
88
89 static struct swsusp_header *swsusp_header;
90
91 /**
92  *      The following functions are used for tracing the allocated
93  *      swap pages, so that they can be freed in case of an error.
94  */
95
96 struct swsusp_extent {
97         struct rb_node node;
98         unsigned long start;
99         unsigned long end;
100 };
101
102 static struct rb_root swsusp_extents = RB_ROOT;
103
104 static int swsusp_extents_insert(unsigned long swap_offset)
105 {
106         struct rb_node **new = &(swsusp_extents.rb_node);
107         struct rb_node *parent = NULL;
108         struct swsusp_extent *ext;
109
110         /* Figure out where to put the new node */
111         while (*new) {
112                 ext = container_of(*new, struct swsusp_extent, node);
113                 parent = *new;
114                 if (swap_offset < ext->start) {
115                         /* Try to merge */
116                         if (swap_offset == ext->start - 1) {
117                                 ext->start--;
118                                 return 0;
119                         }
120                         new = &((*new)->rb_left);
121                 } else if (swap_offset > ext->end) {
122                         /* Try to merge */
123                         if (swap_offset == ext->end + 1) {
124                                 ext->end++;
125                                 return 0;
126                         }
127                         new = &((*new)->rb_right);
128                 } else {
129                         /* It already is in the tree */
130                         return -EINVAL;
131                 }
132         }
133         /* Add the new node and rebalance the tree. */
134         ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
135         if (!ext)
136                 return -ENOMEM;
137
138         ext->start = swap_offset;
139         ext->end = swap_offset;
140         rb_link_node(&ext->node, parent, new);
141         rb_insert_color(&ext->node, &swsusp_extents);
142         return 0;
143 }
144
145 /**
146  *      alloc_swapdev_block - allocate a swap page and register that it has
147  *      been allocated, so that it can be freed in case of an error.
148  */
149
150 sector_t alloc_swapdev_block(int swap)
151 {
152         unsigned long offset;
153
154         offset = swp_offset(get_swap_page_of_type(swap));
155         if (offset) {
156                 if (swsusp_extents_insert(offset))
157                         swap_free(swp_entry(swap, offset));
158                 else
159                         return swapdev_block(swap, offset);
160         }
161         return 0;
162 }
163
164 /**
165  *      free_all_swap_pages - free swap pages allocated for saving image data.
166  *      It also frees the extents used to register which swap entries had been
167  *      allocated.
168  */
169
170 void free_all_swap_pages(int swap)
171 {
172         struct rb_node *node;
173
174         while ((node = swsusp_extents.rb_node)) {
175                 struct swsusp_extent *ext;
176                 unsigned long offset;
177
178                 ext = container_of(node, struct swsusp_extent, node);
179                 rb_erase(node, &swsusp_extents);
180                 for (offset = ext->start; offset <= ext->end; offset++)
181                         swap_free(swp_entry(swap, offset));
182
183                 kfree(ext);
184         }
185 }
186
187 int swsusp_swap_in_use(void)
188 {
189         return (swsusp_extents.rb_node != NULL);
190 }
191
192 /*
193  * General things
194  */
195
196 static unsigned short root_swap = 0xffff;
197 struct block_device *hib_resume_bdev;
198
199 /*
200  * Saving part
201  */
202
203 static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
204 {
205         int error;
206
207         hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
208         if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
209             !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
210                 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
211                 memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
212                 swsusp_header->image = handle->first_sector;
213                 swsusp_header->flags = flags;
214                 if (flags & SF_CRC32_MODE)
215                         swsusp_header->crc32 = handle->crc32;
216                 error = hib_bio_write_page(swsusp_resume_block,
217                                         swsusp_header, NULL);
218         } else {
219                 printk(KERN_ERR "PM: Swap header not found!\n");
220                 error = -ENODEV;
221         }
222         return error;
223 }
224
225 /**
226  *      swsusp_swap_check - check if the resume device is a swap device
227  *      and get its index (if so)
228  *
229  *      This is called before saving image
230  */
231 static int swsusp_swap_check(void)
232 {
233         int res;
234
235         res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
236                         &hib_resume_bdev);
237         if (res < 0)
238                 return res;
239
240         root_swap = res;
241         res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
242         if (res)
243                 return res;
244
245         res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
246         if (res < 0)
247                 blkdev_put(hib_resume_bdev, FMODE_WRITE);
248
249         return res;
250 }
251
252 /**
253  *      write_page - Write one page to given swap location.
254  *      @buf:           Address we're writing.
255  *      @offset:        Offset of the swap page we're writing to.
256  *      @bio_chain:     Link the next write BIO here
257  */
258
259 static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
260 {
261         void *src;
262         int ret;
263
264         if (!offset)
265                 return -ENOSPC;
266
267         if (bio_chain) {
268                 src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
269                 if (src) {
270                         copy_page(src, buf);
271                 } else {
272                         ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
273                         if (ret)
274                                 return ret;
275                         src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
276                         if (src) {
277                                 copy_page(src, buf);
278                         } else {
279                                 WARN_ON_ONCE(1);
280                                 bio_chain = NULL;       /* Go synchronous */
281                                 src = buf;
282                         }
283                 }
284         } else {
285                 src = buf;
286         }
287         return hib_bio_write_page(offset, src, bio_chain);
288 }
289
290 static void release_swap_writer(struct swap_map_handle *handle)
291 {
292         if (handle->cur)
293                 free_page((unsigned long)handle->cur);
294         handle->cur = NULL;
295 }
296
297 static int get_swap_writer(struct swap_map_handle *handle)
298 {
299         int ret;
300
301         ret = swsusp_swap_check();
302         if (ret) {
303                 if (ret != -ENOSPC)
304                         printk(KERN_ERR "PM: Cannot find swap device, try "
305                                         "swapon -a.\n");
306                 return ret;
307         }
308         handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
309         if (!handle->cur) {
310                 ret = -ENOMEM;
311                 goto err_close;
312         }
313         handle->cur_swap = alloc_swapdev_block(root_swap);
314         if (!handle->cur_swap) {
315                 ret = -ENOSPC;
316                 goto err_rel;
317         }
318         handle->k = 0;
319         handle->nr_free_pages = nr_free_pages() >> 1;
320         handle->written = 0;
321         handle->first_sector = handle->cur_swap;
322         return 0;
323 err_rel:
324         release_swap_writer(handle);
325 err_close:
326         swsusp_close(FMODE_WRITE);
327         return ret;
328 }
329
330 static int swap_write_page(struct swap_map_handle *handle, void *buf,
331                                 struct bio **bio_chain)
332 {
333         int error = 0;
334         sector_t offset;
335
336         if (!handle->cur)
337                 return -EINVAL;
338         offset = alloc_swapdev_block(root_swap);
339         error = write_page(buf, offset, bio_chain);
340         if (error)
341                 return error;
342         handle->cur->entries[handle->k++] = offset;
343         if (handle->k >= MAP_PAGE_ENTRIES) {
344                 offset = alloc_swapdev_block(root_swap);
345                 if (!offset)
346                         return -ENOSPC;
347                 handle->cur->next_swap = offset;
348                 error = write_page(handle->cur, handle->cur_swap, bio_chain);
349                 if (error)
350                         goto out;
351                 clear_page(handle->cur);
352                 handle->cur_swap = offset;
353                 handle->k = 0;
354         }
355         if (bio_chain && ++handle->written > handle->nr_free_pages) {
356                 error = hib_wait_on_bio_chain(bio_chain);
357                 if (error)
358                         goto out;
359                 handle->written = 0;
360         }
361  out:
362         return error;
363 }
364
365 static int flush_swap_writer(struct swap_map_handle *handle)
366 {
367         if (handle->cur && handle->cur_swap)
368                 return write_page(handle->cur, handle->cur_swap, NULL);
369         else
370                 return -EINVAL;
371 }
372
373 static int swap_writer_finish(struct swap_map_handle *handle,
374                 unsigned int flags, int error)
375 {
376         if (!error) {
377                 flush_swap_writer(handle);
378                 printk(KERN_INFO "PM: S");
379                 error = mark_swapfiles(handle, flags);
380                 printk("|\n");
381         }
382
383         if (error)
384                 free_all_swap_pages(root_swap);
385         release_swap_writer(handle);
386         swsusp_close(FMODE_WRITE);
387
388         return error;
389 }
390
391 /* We need to remember how much compressed data we need to read. */
392 #define LZO_HEADER      sizeof(size_t)
393
394 /* Number of pages/bytes we'll compress at one time. */
395 #define LZO_UNC_PAGES   32
396 #define LZO_UNC_SIZE    (LZO_UNC_PAGES * PAGE_SIZE)
397
398 /* Number of pages/bytes we need for compressed data (worst case). */
399 #define LZO_CMP_PAGES   DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \
400                                      LZO_HEADER, PAGE_SIZE)
401 #define LZO_CMP_SIZE    (LZO_CMP_PAGES * PAGE_SIZE)
402
403 /* Maximum number of threads for compression/decompression. */
404 #define LZO_THREADS     3
405
406 /* Maximum number of pages for read buffering. */
407 #define LZO_READ_PAGES  (MAP_PAGE_ENTRIES * 8)
408
409
410 /**
411  *      save_image - save the suspend image data
412  */
413
414 static int save_image(struct swap_map_handle *handle,
415                       struct snapshot_handle *snapshot,
416                       unsigned int nr_to_write)
417 {
418         unsigned int m;
419         int ret;
420         int nr_pages;
421         int err2;
422         struct bio *bio;
423         struct timeval start;
424         struct timeval stop;
425
426         printk(KERN_INFO "PM: Saving image data pages (%u pages) ...     ",
427                 nr_to_write);
428         m = nr_to_write / 100;
429         if (!m)
430                 m = 1;
431         nr_pages = 0;
432         bio = NULL;
433         do_gettimeofday(&start);
434         while (1) {
435                 ret = snapshot_read_next(snapshot);
436                 if (ret <= 0)
437                         break;
438                 ret = swap_write_page(handle, data_of(*snapshot), &bio);
439                 if (ret)
440                         break;
441                 if (!(nr_pages % m))
442                         printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
443                 nr_pages++;
444         }
445         err2 = hib_wait_on_bio_chain(&bio);
446         do_gettimeofday(&stop);
447         if (!ret)
448                 ret = err2;
449         if (!ret)
450                 printk(KERN_CONT "\b\b\b\bdone\n");
451         else
452                 printk(KERN_CONT "\n");
453         swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
454         return ret;
455 }
456
457 /**
458  * Structure used for CRC32.
459  */
460 struct crc_data {
461         struct task_struct *thr;                  /* thread */
462         atomic_t ready;                           /* ready to start flag */
463         atomic_t stop;                            /* ready to stop flag */
464         unsigned run_threads;                     /* nr current threads */
465         wait_queue_head_t go;                     /* start crc update */
466         wait_queue_head_t done;                   /* crc update done */
467         u32 *crc32;                               /* points to handle's crc32 */
468         size_t *unc_len[LZO_THREADS];             /* uncompressed lengths */
469         unsigned char *unc[LZO_THREADS];          /* uncompressed data */
470 };
471
472 /**
473  * CRC32 update function that runs in its own thread.
474  */
475 static int crc32_threadfn(void *data)
476 {
477         struct crc_data *d = data;
478         unsigned i;
479
480         while (1) {
481                 wait_event(d->go, atomic_read(&d->ready) ||
482                                   kthread_should_stop());
483                 if (kthread_should_stop()) {
484                         d->thr = NULL;
485                         atomic_set(&d->stop, 1);
486                         wake_up(&d->done);
487                         break;
488                 }
489                 atomic_set(&d->ready, 0);
490
491                 for (i = 0; i < d->run_threads; i++)
492                         *d->crc32 = crc32_le(*d->crc32,
493                                              d->unc[i], *d->unc_len[i]);
494                 atomic_set(&d->stop, 1);
495                 wake_up(&d->done);
496         }
497         return 0;
498 }
499 /**
500  * Structure used for LZO data compression.
501  */
502 struct cmp_data {
503         struct task_struct *thr;                  /* thread */
504         atomic_t ready;                           /* ready to start flag */
505         atomic_t stop;                            /* ready to stop flag */
506         int ret;                                  /* return code */
507         wait_queue_head_t go;                     /* start compression */
508         wait_queue_head_t done;                   /* compression done */
509         size_t unc_len;                           /* uncompressed length */
510         size_t cmp_len;                           /* compressed length */
511         unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
512         unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
513         unsigned char wrk[LZO1X_1_MEM_COMPRESS];  /* compression workspace */
514 };
515
516 /**
517  * Compression function that runs in its own thread.
518  */
519 static int lzo_compress_threadfn(void *data)
520 {
521         struct cmp_data *d = data;
522
523         while (1) {
524                 wait_event(d->go, atomic_read(&d->ready) ||
525                                   kthread_should_stop());
526                 if (kthread_should_stop()) {
527                         d->thr = NULL;
528                         d->ret = -1;
529                         atomic_set(&d->stop, 1);
530                         wake_up(&d->done);
531                         break;
532                 }
533                 atomic_set(&d->ready, 0);
534
535                 d->ret = lzo1x_1_compress(d->unc, d->unc_len,
536                                           d->cmp + LZO_HEADER, &d->cmp_len,
537                                           d->wrk);
538                 atomic_set(&d->stop, 1);
539                 wake_up(&d->done);
540         }
541         return 0;
542 }
543
544 /**
545  * save_image_lzo - Save the suspend image data compressed with LZO.
546  * @handle: Swap mam handle to use for saving the image.
547  * @snapshot: Image to read data from.
548  * @nr_to_write: Number of pages to save.
549  */
550 static int save_image_lzo(struct swap_map_handle *handle,
551                           struct snapshot_handle *snapshot,
552                           unsigned int nr_to_write)
553 {
554         unsigned int m;
555         int ret = 0;
556         int nr_pages;
557         int err2;
558         struct bio *bio;
559         struct timeval start;
560         struct timeval stop;
561         size_t off;
562         unsigned thr, run_threads, nr_threads;
563         unsigned char *page = NULL;
564         struct cmp_data *data = NULL;
565         struct crc_data *crc = NULL;
566
567         /*
568          * We'll limit the number of threads for compression to limit memory
569          * footprint.
570          */
571         nr_threads = num_online_cpus() - 1;
572         nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
573
574         page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
575         if (!page) {
576                 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
577                 ret = -ENOMEM;
578                 goto out_clean;
579         }
580
581         data = vmalloc(sizeof(*data) * nr_threads);
582         if (!data) {
583                 printk(KERN_ERR "PM: Failed to allocate LZO data\n");
584                 ret = -ENOMEM;
585                 goto out_clean;
586         }
587         for (thr = 0; thr < nr_threads; thr++)
588                 memset(&data[thr], 0, offsetof(struct cmp_data, go));
589
590         crc = kmalloc(sizeof(*crc), GFP_KERNEL);
591         if (!crc) {
592                 printk(KERN_ERR "PM: Failed to allocate crc\n");
593                 ret = -ENOMEM;
594                 goto out_clean;
595         }
596         memset(crc, 0, offsetof(struct crc_data, go));
597
598         /*
599          * Start the compression threads.
600          */
601         for (thr = 0; thr < nr_threads; thr++) {
602                 init_waitqueue_head(&data[thr].go);
603                 init_waitqueue_head(&data[thr].done);
604
605                 data[thr].thr = kthread_run(lzo_compress_threadfn,
606                                             &data[thr],
607                                             "image_compress/%u", thr);
608                 if (IS_ERR(data[thr].thr)) {
609                         data[thr].thr = NULL;
610                         printk(KERN_ERR
611                                "PM: Cannot start compression threads\n");
612                         ret = -ENOMEM;
613                         goto out_clean;
614                 }
615         }
616
617         /*
618          * Adjust number of free pages after all allocations have been done.
619          * We don't want to run out of pages when writing.
620          */
621         handle->nr_free_pages = nr_free_pages() >> 1;
622
623         /*
624          * Start the CRC32 thread.
625          */
626         init_waitqueue_head(&crc->go);
627         init_waitqueue_head(&crc->done);
628
629         handle->crc32 = 0;
630         crc->crc32 = &handle->crc32;
631         for (thr = 0; thr < nr_threads; thr++) {
632                 crc->unc[thr] = data[thr].unc;
633                 crc->unc_len[thr] = &data[thr].unc_len;
634         }
635
636         crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
637         if (IS_ERR(crc->thr)) {
638                 crc->thr = NULL;
639                 printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
640                 ret = -ENOMEM;
641                 goto out_clean;
642         }
643
644         printk(KERN_INFO
645                 "PM: Using %u thread(s) for compression.\n"
646                 "PM: Compressing and saving image data (%u pages) ...     ",
647                 nr_threads, nr_to_write);
648         m = nr_to_write / 100;
649         if (!m)
650                 m = 1;
651         nr_pages = 0;
652         bio = NULL;
653         do_gettimeofday(&start);
654         for (;;) {
655                 for (thr = 0; thr < nr_threads; thr++) {
656                         for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
657                                 ret = snapshot_read_next(snapshot);
658                                 if (ret < 0)
659                                         goto out_finish;
660
661                                 if (!ret)
662                                         break;
663
664                                 memcpy(data[thr].unc + off,
665                                        data_of(*snapshot), PAGE_SIZE);
666
667                                 if (!(nr_pages % m))
668                                         printk(KERN_CONT "\b\b\b\b%3d%%",
669                                                nr_pages / m);
670                                 nr_pages++;
671                         }
672                         if (!off)
673                                 break;
674
675                         data[thr].unc_len = off;
676
677                         atomic_set(&data[thr].ready, 1);
678                         wake_up(&data[thr].go);
679                 }
680
681                 if (!thr)
682                         break;
683
684                 crc->run_threads = thr;
685                 atomic_set(&crc->ready, 1);
686                 wake_up(&crc->go);
687
688                 for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
689                         wait_event(data[thr].done,
690                                    atomic_read(&data[thr].stop));
691                         atomic_set(&data[thr].stop, 0);
692
693                         ret = data[thr].ret;
694
695                         if (ret < 0) {
696                                 printk(KERN_ERR "PM: LZO compression failed\n");
697                                 goto out_finish;
698                         }
699
700                         if (unlikely(!data[thr].cmp_len ||
701                                      data[thr].cmp_len >
702                                      lzo1x_worst_compress(data[thr].unc_len))) {
703                                 printk(KERN_ERR
704                                        "PM: Invalid LZO compressed length\n");
705                                 ret = -1;
706                                 goto out_finish;
707                         }
708
709                         *(size_t *)data[thr].cmp = data[thr].cmp_len;
710
711                         /*
712                          * Given we are writing one page at a time to disk, we
713                          * copy that much from the buffer, although the last
714                          * bit will likely be smaller than full page. This is
715                          * OK - we saved the length of the compressed data, so
716                          * any garbage at the end will be discarded when we
717                          * read it.
718                          */
719                         for (off = 0;
720                              off < LZO_HEADER + data[thr].cmp_len;
721                              off += PAGE_SIZE) {
722                                 memcpy(page, data[thr].cmp + off, PAGE_SIZE);
723
724                                 ret = swap_write_page(handle, page, &bio);
725                                 if (ret)
726                                         goto out_finish;
727                         }
728                 }
729
730                 wait_event(crc->done, atomic_read(&crc->stop));
731                 atomic_set(&crc->stop, 0);
732         }
733
734 out_finish:
735         err2 = hib_wait_on_bio_chain(&bio);
736         do_gettimeofday(&stop);
737         if (!ret)
738                 ret = err2;
739         if (!ret) {
740                 printk(KERN_CONT "\b\b\b\bdone\n");
741         } else {
742                 printk(KERN_CONT "\n");
743         }
744         swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
745 out_clean:
746         if (crc) {
747                 if (crc->thr)
748                         kthread_stop(crc->thr);
749                 kfree(crc);
750         }
751         if (data) {
752                 for (thr = 0; thr < nr_threads; thr++)
753                         if (data[thr].thr)
754                                 kthread_stop(data[thr].thr);
755                 vfree(data);
756         }
757         if (page) free_page((unsigned long)page);
758
759         return ret;
760 }
761
762 /**
763  *      enough_swap - Make sure we have enough swap to save the image.
764  *
765  *      Returns TRUE or FALSE after checking the total amount of swap
766  *      space avaiable from the resume partition.
767  */
768
769 static int enough_swap(unsigned int nr_pages, unsigned int flags)
770 {
771         unsigned int free_swap = count_swap_pages(root_swap, 1);
772         unsigned int required;
773
774         pr_debug("PM: Free swap pages: %u\n", free_swap);
775
776         required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ?
777                 nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1);
778         return free_swap > required;
779 }
780
781 /**
782  *      swsusp_write - Write entire image and metadata.
783  *      @flags: flags to pass to the "boot" kernel in the image header
784  *
785  *      It is important _NOT_ to umount filesystems at this point. We want
786  *      them synced (in case something goes wrong) but we DO not want to mark
787  *      filesystem clean: it is not. (And it does not matter, if we resume
788  *      correctly, we'll mark system clean, anyway.)
789  */
790
791 int swsusp_write(unsigned int flags)
792 {
793         struct swap_map_handle handle;
794         struct snapshot_handle snapshot;
795         struct swsusp_info *header;
796         unsigned long pages;
797         int error;
798
799         pages = snapshot_get_image_size();
800         error = get_swap_writer(&handle);
801         if (error) {
802                 printk(KERN_ERR "PM: Cannot get swap writer\n");
803                 return error;
804         }
805         if (!enough_swap(pages, flags)) {
806                 printk(KERN_ERR "PM: Not enough free swap\n");
807                 error = -ENOSPC;
808                 goto out_finish;
809         }
810         memset(&snapshot, 0, sizeof(struct snapshot_handle));
811         error = snapshot_read_next(&snapshot);
812         if (error < PAGE_SIZE) {
813                 if (error >= 0)
814                         error = -EFAULT;
815
816                 goto out_finish;
817         }
818         header = (struct swsusp_info *)data_of(snapshot);
819         error = swap_write_page(&handle, header, NULL);
820         if (!error) {
821                 error = (flags & SF_NOCOMPRESS_MODE) ?
822                         save_image(&handle, &snapshot, pages - 1) :
823                         save_image_lzo(&handle, &snapshot, pages - 1);
824         }
825 out_finish:
826         error = swap_writer_finish(&handle, flags, error);
827         return error;
828 }
829
830 /**
831  *      The following functions allow us to read data using a swap map
832  *      in a file-alike way
833  */
834
835 static void release_swap_reader(struct swap_map_handle *handle)
836 {
837         struct swap_map_page_list *tmp;
838
839         while (handle->maps) {
840                 if (handle->maps->map)
841                         free_page((unsigned long)handle->maps->map);
842                 tmp = handle->maps;
843                 handle->maps = handle->maps->next;
844                 kfree(tmp);
845         }
846         handle->cur = NULL;
847 }
848
849 static int get_swap_reader(struct swap_map_handle *handle,
850                 unsigned int *flags_p)
851 {
852         int error;
853         struct swap_map_page_list *tmp, *last;
854         sector_t offset;
855
856         *flags_p = swsusp_header->flags;
857
858         if (!swsusp_header->image) /* how can this happen? */
859                 return -EINVAL;
860
861         handle->cur = NULL;
862         last = handle->maps = NULL;
863         offset = swsusp_header->image;
864         while (offset) {
865                 tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL);
866                 if (!tmp) {
867                         release_swap_reader(handle);
868                         return -ENOMEM;
869                 }
870                 memset(tmp, 0, sizeof(*tmp));
871                 if (!handle->maps)
872                         handle->maps = tmp;
873                 if (last)
874                         last->next = tmp;
875                 last = tmp;
876
877                 tmp->map = (struct swap_map_page *)
878                            __get_free_page(__GFP_WAIT | __GFP_HIGH);
879                 if (!tmp->map) {
880                         release_swap_reader(handle);
881                         return -ENOMEM;
882                 }
883
884                 error = hib_bio_read_page(offset, tmp->map, NULL);
885                 if (error) {
886                         release_swap_reader(handle);
887                         return error;
888                 }
889                 offset = tmp->map->next_swap;
890         }
891         handle->k = 0;
892         handle->cur = handle->maps->map;
893         return 0;
894 }
895
896 static int swap_read_page(struct swap_map_handle *handle, void *buf,
897                                 struct bio **bio_chain)
898 {
899         sector_t offset;
900         int error;
901         struct swap_map_page_list *tmp;
902
903         if (!handle->cur)
904                 return -EINVAL;
905         offset = handle->cur->entries[handle->k];
906         if (!offset)
907                 return -EFAULT;
908         error = hib_bio_read_page(offset, buf, bio_chain);
909         if (error)
910                 return error;
911         if (++handle->k >= MAP_PAGE_ENTRIES) {
912                 handle->k = 0;
913                 free_page((unsigned long)handle->maps->map);
914                 tmp = handle->maps;
915                 handle->maps = handle->maps->next;
916                 kfree(tmp);
917                 if (!handle->maps)
918                         release_swap_reader(handle);
919                 else
920                         handle->cur = handle->maps->map;
921         }
922         return error;
923 }
924
925 static int swap_reader_finish(struct swap_map_handle *handle)
926 {
927         release_swap_reader(handle);
928
929         return 0;
930 }
931
932 /**
933  *      load_image - load the image using the swap map handle
934  *      @handle and the snapshot handle @snapshot
935  *      (assume there are @nr_pages pages to load)
936  */
937
938 static int load_image(struct swap_map_handle *handle,
939                       struct snapshot_handle *snapshot,
940                       unsigned int nr_to_read)
941 {
942         unsigned int m;
943         int ret = 0;
944         struct timeval start;
945         struct timeval stop;
946         struct bio *bio;
947         int err2;
948         unsigned nr_pages;
949
950         printk(KERN_INFO "PM: Loading image data pages (%u pages) ...     ",
951                 nr_to_read);
952         m = nr_to_read / 100;
953         if (!m)
954                 m = 1;
955         nr_pages = 0;
956         bio = NULL;
957         do_gettimeofday(&start);
958         for ( ; ; ) {
959                 ret = snapshot_write_next(snapshot);
960                 if (ret <= 0)
961                         break;
962                 ret = swap_read_page(handle, data_of(*snapshot), &bio);
963                 if (ret)
964                         break;
965                 if (snapshot->sync_read)
966                         ret = hib_wait_on_bio_chain(&bio);
967                 if (ret)
968                         break;
969                 if (!(nr_pages % m))
970                         printk("\b\b\b\b%3d%%", nr_pages / m);
971                 nr_pages++;
972         }
973         err2 = hib_wait_on_bio_chain(&bio);
974         do_gettimeofday(&stop);
975         if (!ret)
976                 ret = err2;
977         if (!ret) {
978                 printk("\b\b\b\bdone\n");
979                 snapshot_write_finalize(snapshot);
980                 if (!snapshot_image_loaded(snapshot))
981                         ret = -ENODATA;
982         } else
983                 printk("\n");
984         swsusp_show_speed(&start, &stop, nr_to_read, "Read");
985         return ret;
986 }
987
988 /**
989  * Structure used for LZO data decompression.
990  */
991 struct dec_data {
992         struct task_struct *thr;                  /* thread */
993         atomic_t ready;                           /* ready to start flag */
994         atomic_t stop;                            /* ready to stop flag */
995         int ret;                                  /* return code */
996         wait_queue_head_t go;                     /* start decompression */
997         wait_queue_head_t done;                   /* decompression done */
998         size_t unc_len;                           /* uncompressed length */
999         size_t cmp_len;                           /* compressed length */
1000         unsigned char unc[LZO_UNC_SIZE];          /* uncompressed buffer */
1001         unsigned char cmp[LZO_CMP_SIZE];          /* compressed buffer */
1002 };
1003
1004 /**
1005  * Deompression function that runs in its own thread.
1006  */
1007 static int lzo_decompress_threadfn(void *data)
1008 {
1009         struct dec_data *d = data;
1010
1011         while (1) {
1012                 wait_event(d->go, atomic_read(&d->ready) ||
1013                                   kthread_should_stop());
1014                 if (kthread_should_stop()) {
1015                         d->thr = NULL;
1016                         d->ret = -1;
1017                         atomic_set(&d->stop, 1);
1018                         wake_up(&d->done);
1019                         break;
1020                 }
1021                 atomic_set(&d->ready, 0);
1022
1023                 d->unc_len = LZO_UNC_SIZE;
1024                 d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
1025                                                d->unc, &d->unc_len);
1026                 atomic_set(&d->stop, 1);
1027                 wake_up(&d->done);
1028         }
1029         return 0;
1030 }
1031
1032 /**
1033  * load_image_lzo - Load compressed image data and decompress them with LZO.
1034  * @handle: Swap map handle to use for loading data.
1035  * @snapshot: Image to copy uncompressed data into.
1036  * @nr_to_read: Number of pages to load.
1037  */
1038 static int load_image_lzo(struct swap_map_handle *handle,
1039                           struct snapshot_handle *snapshot,
1040                           unsigned int nr_to_read)
1041 {
1042         unsigned int m;
1043         int ret = 0;
1044         int eof = 0;
1045         struct bio *bio;
1046         struct timeval start;
1047         struct timeval stop;
1048         unsigned nr_pages;
1049         size_t off;
1050         unsigned i, thr, run_threads, nr_threads;
1051         unsigned ring = 0, pg = 0, ring_size = 0,
1052                  have = 0, want, need, asked = 0;
1053         unsigned long read_pages;
1054         unsigned char **page = NULL;
1055         struct dec_data *data = NULL;
1056         struct crc_data *crc = NULL;
1057
1058         /*
1059          * We'll limit the number of threads for decompression to limit memory
1060          * footprint.
1061          */
1062         nr_threads = num_online_cpus() - 1;
1063         nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
1064
1065         page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
1066         if (!page) {
1067                 printk(KERN_ERR "PM: Failed to allocate LZO page\n");
1068                 ret = -ENOMEM;
1069                 goto out_clean;
1070         }
1071
1072         data = vmalloc(sizeof(*data) * nr_threads);
1073         if (!data) {
1074                 printk(KERN_ERR "PM: Failed to allocate LZO data\n");
1075                 ret = -ENOMEM;
1076                 goto out_clean;
1077         }
1078         for (thr = 0; thr < nr_threads; thr++)
1079                 memset(&data[thr], 0, offsetof(struct dec_data, go));
1080
1081         crc = kmalloc(sizeof(*crc), GFP_KERNEL);
1082         if (!crc) {
1083                 printk(KERN_ERR "PM: Failed to allocate crc\n");
1084                 ret = -ENOMEM;
1085                 goto out_clean;
1086         }
1087         memset(crc, 0, offsetof(struct crc_data, go));
1088
1089         /*
1090          * Start the decompression threads.
1091          */
1092         for (thr = 0; thr < nr_threads; thr++) {
1093                 init_waitqueue_head(&data[thr].go);
1094                 init_waitqueue_head(&data[thr].done);
1095
1096                 data[thr].thr = kthread_run(lzo_decompress_threadfn,
1097                                             &data[thr],
1098                                             "image_decompress/%u", thr);
1099                 if (IS_ERR(data[thr].thr)) {
1100                         data[thr].thr = NULL;
1101                         printk(KERN_ERR
1102                                "PM: Cannot start decompression threads\n");
1103                         ret = -ENOMEM;
1104                         goto out_clean;
1105                 }
1106         }
1107
1108         /*
1109          * Start the CRC32 thread.
1110          */
1111         init_waitqueue_head(&crc->go);
1112         init_waitqueue_head(&crc->done);
1113
1114         handle->crc32 = 0;
1115         crc->crc32 = &handle->crc32;
1116         for (thr = 0; thr < nr_threads; thr++) {
1117                 crc->unc[thr] = data[thr].unc;
1118                 crc->unc_len[thr] = &data[thr].unc_len;
1119         }
1120
1121         crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
1122         if (IS_ERR(crc->thr)) {
1123                 crc->thr = NULL;
1124                 printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
1125                 ret = -ENOMEM;
1126                 goto out_clean;
1127         }
1128
1129         /*
1130          * Adjust number of pages for read buffering, in case we are short.
1131          */
1132         read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1;
1133         read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES);
1134
1135         for (i = 0; i < read_pages; i++) {
1136                 page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
1137                                                   __GFP_WAIT | __GFP_HIGH :
1138                                                   __GFP_WAIT);
1139                 if (!page[i]) {
1140                         if (i < LZO_CMP_PAGES) {
1141                                 ring_size = i;
1142                                 printk(KERN_ERR
1143                                        "PM: Failed to allocate LZO pages\n");
1144                                 ret = -ENOMEM;
1145                                 goto out_clean;
1146                         } else {
1147                                 break;
1148                         }
1149                 }
1150         }
1151         want = ring_size = i;
1152
1153         printk(KERN_INFO
1154                 "PM: Using %u thread(s) for decompression.\n"
1155                 "PM: Loading and decompressing image data (%u pages) ...     ",
1156                 nr_threads, nr_to_read);
1157         m = nr_to_read / 100;
1158         if (!m)
1159                 m = 1;
1160         nr_pages = 0;
1161         bio = NULL;
1162         do_gettimeofday(&start);
1163
1164         ret = snapshot_write_next(snapshot);
1165         if (ret <= 0)
1166                 goto out_finish;
1167
1168         for(;;) {
1169                 for (i = 0; !eof && i < want; i++) {
1170                         ret = swap_read_page(handle, page[ring], &bio);
1171                         if (ret) {
1172                                 /*
1173                                  * On real read error, finish. On end of data,
1174                                  * set EOF flag and just exit the read loop.
1175                                  */
1176                                 if (handle->cur &&
1177                                     handle->cur->entries[handle->k]) {
1178                                         goto out_finish;
1179                                 } else {
1180                                         eof = 1;
1181                                         break;
1182                                 }
1183                         }
1184                         if (++ring >= ring_size)
1185                                 ring = 0;
1186                 }
1187                 asked += i;
1188                 want -= i;
1189
1190                 /*
1191                  * We are out of data, wait for some more.
1192                  */
1193                 if (!have) {
1194                         if (!asked)
1195                                 break;
1196
1197                         ret = hib_wait_on_bio_chain(&bio);
1198                         if (ret)
1199                                 goto out_finish;
1200                         have += asked;
1201                         asked = 0;
1202                         if (eof)
1203                                 eof = 2;
1204                 }
1205
1206                 if (crc->run_threads) {
1207                         wait_event(crc->done, atomic_read(&crc->stop));
1208                         atomic_set(&crc->stop, 0);
1209                         crc->run_threads = 0;
1210                 }
1211
1212                 for (thr = 0; have && thr < nr_threads; thr++) {
1213                         data[thr].cmp_len = *(size_t *)page[pg];
1214                         if (unlikely(!data[thr].cmp_len ||
1215                                      data[thr].cmp_len >
1216                                      lzo1x_worst_compress(LZO_UNC_SIZE))) {
1217                                 printk(KERN_ERR
1218                                        "PM: Invalid LZO compressed length\n");
1219                                 ret = -1;
1220                                 goto out_finish;
1221                         }
1222
1223                         need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
1224                                             PAGE_SIZE);
1225                         if (need > have) {
1226                                 if (eof > 1) {
1227                                         ret = -1;
1228                                         goto out_finish;
1229                                 }
1230                                 break;
1231                         }
1232
1233                         for (off = 0;
1234                              off < LZO_HEADER + data[thr].cmp_len;
1235                              off += PAGE_SIZE) {
1236                                 memcpy(data[thr].cmp + off,
1237                                        page[pg], PAGE_SIZE);
1238                                 have--;
1239                                 want++;
1240                                 if (++pg >= ring_size)
1241                                         pg = 0;
1242                         }
1243
1244                         atomic_set(&data[thr].ready, 1);
1245                         wake_up(&data[thr].go);
1246                 }
1247
1248                 /*
1249                  * Wait for more data while we are decompressing.
1250                  */
1251                 if (have < LZO_CMP_PAGES && asked) {
1252                         ret = hib_wait_on_bio_chain(&bio);
1253                         if (ret)
1254                                 goto out_finish;
1255                         have += asked;
1256                         asked = 0;
1257                         if (eof)
1258                                 eof = 2;
1259                 }
1260
1261                 for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
1262                         wait_event(data[thr].done,
1263                                    atomic_read(&data[thr].stop));
1264                         atomic_set(&data[thr].stop, 0);
1265
1266                         ret = data[thr].ret;
1267
1268                         if (ret < 0) {
1269                                 printk(KERN_ERR
1270                                        "PM: LZO decompression failed\n");
1271                                 goto out_finish;
1272                         }
1273
1274                         if (unlikely(!data[thr].unc_len ||
1275                                      data[thr].unc_len > LZO_UNC_SIZE ||
1276                                      data[thr].unc_len & (PAGE_SIZE - 1))) {
1277                                 printk(KERN_ERR
1278                                        "PM: Invalid LZO uncompressed length\n");
1279                                 ret = -1;
1280                                 goto out_finish;
1281                         }
1282
1283                         for (off = 0;
1284                              off < data[thr].unc_len; off += PAGE_SIZE) {
1285                                 memcpy(data_of(*snapshot),
1286                                        data[thr].unc + off, PAGE_SIZE);
1287
1288                                 if (!(nr_pages % m))
1289                                         printk("\b\b\b\b%3d%%", nr_pages / m);
1290                                 nr_pages++;
1291
1292                                 ret = snapshot_write_next(snapshot);
1293                                 if (ret <= 0) {
1294                                         crc->run_threads = thr + 1;
1295                                         atomic_set(&crc->ready, 1);
1296                                         wake_up(&crc->go);
1297                                         goto out_finish;
1298                                 }
1299                         }
1300                 }
1301
1302                 crc->run_threads = thr;
1303                 atomic_set(&crc->ready, 1);
1304                 wake_up(&crc->go);
1305         }
1306
1307 out_finish:
1308         if (crc->run_threads) {
1309                 wait_event(crc->done, atomic_read(&crc->stop));
1310                 atomic_set(&crc->stop, 0);
1311         }
1312         do_gettimeofday(&stop);
1313         if (!ret) {
1314                 printk("\b\b\b\bdone\n");
1315                 snapshot_write_finalize(snapshot);
1316                 if (!snapshot_image_loaded(snapshot))
1317                         ret = -ENODATA;
1318                 if (!ret) {
1319                         if (swsusp_header->flags & SF_CRC32_MODE) {
1320                                 if(handle->crc32 != swsusp_header->crc32) {
1321                                         printk(KERN_ERR
1322                                                "PM: Invalid image CRC32!\n");
1323                                         ret = -ENODATA;
1324                                 }
1325                         }
1326                 }
1327         } else
1328                 printk("\n");
1329         swsusp_show_speed(&start, &stop, nr_to_read, "Read");
1330 out_clean:
1331         for (i = 0; i < ring_size; i++)
1332                 free_page((unsigned long)page[i]);
1333         if (crc) {
1334                 if (crc->thr)
1335                         kthread_stop(crc->thr);
1336                 kfree(crc);
1337         }
1338         if (data) {
1339                 for (thr = 0; thr < nr_threads; thr++)
1340                         if (data[thr].thr)
1341                                 kthread_stop(data[thr].thr);
1342                 vfree(data);
1343         }
1344         if (page) vfree(page);
1345
1346         return ret;
1347 }
1348
1349 /**
1350  *      swsusp_read - read the hibernation image.
1351  *      @flags_p: flags passed by the "frozen" kernel in the image header should
1352  *                be written into this memory location
1353  */
1354
1355 int swsusp_read(unsigned int *flags_p)
1356 {
1357         int error;
1358         struct swap_map_handle handle;
1359         struct snapshot_handle snapshot;
1360         struct swsusp_info *header;
1361
1362         memset(&snapshot, 0, sizeof(struct snapshot_handle));
1363         error = snapshot_write_next(&snapshot);
1364         if (error < PAGE_SIZE)
1365                 return error < 0 ? error : -EFAULT;
1366         header = (struct swsusp_info *)data_of(snapshot);
1367         error = get_swap_reader(&handle, flags_p);
1368         if (error)
1369                 goto end;
1370         if (!error)
1371                 error = swap_read_page(&handle, header, NULL);
1372         if (!error) {
1373                 error = (*flags_p & SF_NOCOMPRESS_MODE) ?
1374                         load_image(&handle, &snapshot, header->pages - 1) :
1375                         load_image_lzo(&handle, &snapshot, header->pages - 1);
1376         }
1377         swap_reader_finish(&handle);
1378 end:
1379         if (!error)
1380                 pr_debug("PM: Image successfully loaded\n");
1381         else
1382                 pr_debug("PM: Error %d resuming\n", error);
1383         return error;
1384 }
1385
1386 /**
1387  *      swsusp_check - Check for swsusp signature in the resume device
1388  */
1389
1390 int swsusp_check(void)
1391 {
1392         int error;
1393
1394         hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
1395                                             FMODE_READ, NULL);
1396         if (!IS_ERR(hib_resume_bdev)) {
1397                 set_blocksize(hib_resume_bdev, PAGE_SIZE);
1398                 clear_page(swsusp_header);
1399                 error = hib_bio_read_page(swsusp_resume_block,
1400                                         swsusp_header, NULL);
1401                 if (error)
1402                         goto put;
1403
1404                 if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
1405                         memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
1406                         /* Reset swap signature now */
1407                         error = hib_bio_write_page(swsusp_resume_block,
1408                                                 swsusp_header, NULL);
1409                 } else {
1410                         error = -EINVAL;
1411                 }
1412
1413 put:
1414                 if (error)
1415                         blkdev_put(hib_resume_bdev, FMODE_READ);
1416                 else
1417                         pr_debug("PM: Image signature found, resuming\n");
1418         } else {
1419                 error = PTR_ERR(hib_resume_bdev);
1420         }
1421
1422         if (error)
1423                 pr_debug("PM: Image not found (code %d)\n", error);
1424
1425         return error;
1426 }
1427
1428 /**
1429  *      swsusp_close - close swap device.
1430  */
1431
1432 void swsusp_close(fmode_t mode)
1433 {
1434         if (IS_ERR(hib_resume_bdev)) {
1435                 pr_debug("PM: Image device not initialised\n");
1436                 return;
1437         }
1438
1439         blkdev_put(hib_resume_bdev, mode);
1440 }
1441
1442 static int swsusp_header_init(void)
1443 {
1444         swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
1445         if (!swsusp_header)
1446                 panic("Could not allocate memory for swsusp_header\n");
1447         return 0;
1448 }
1449
1450 core_initcall(swsusp_header_init);