drivers/misc/vmw_balloon.c

   1 /*
   2  * VMware Balloon driver.
   3  *
   4  * Copyright (C) 2000-2013, VMware, Inc. All Rights Reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License as published by the
   8  * Free Software Foundation; version 2 of the License and no later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  13  * NON INFRINGEMENT.  See the GNU General Public License for more
  14  * details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Maintained by:       Xavier Deguillard <xdeguillard@vmware.com>
  21  *                      Philip Moltmann <moltmann@vmware.com>
  22  */
  23
  24 /*
  25  * This is VMware physical memory management driver for Linux. The driver
  26  * acts like a "balloon" that can be inflated to reclaim physical pages by
  27  * reserving them in the guest and invalidating them in the monitor,
  28  * freeing up the underlying machine pages so they can be allocated to
  29  * other guests.  The balloon can also be deflated to allow the guest to
  30  * use more physical memory. Higher level policies can control the sizes
  31  * of balloons in VMs in order to manage physical memory resources.
  32  */
  33
  34 //#define DEBUG
  35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  36
  37 #include <linux/types.h>
  38 #include <linux/kernel.h>
  39 #include <linux/mm.h>
  40 #include <linux/vmalloc.h>
  41 #include <linux/sched.h>
  42 #include <linux/module.h>
  43 #include <linux/workqueue.h>
  44 #include <linux/debugfs.h>
  45 #include <linux/seq_file.h>
  46 #include <asm/hypervisor.h>
  47
  48 MODULE_AUTHOR("VMware, Inc.");
  49 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
  50 MODULE_VERSION("1.3.2.0-k");
  51 MODULE_ALIAS("dmi:*:svnVMware*:*");
  52 MODULE_ALIAS("vmware_vmmemctl");
  53 MODULE_LICENSE("GPL");
  54
  55 /*
  56  * Various constants controlling rate of inflaint/deflating balloon,
  57  * measured in pages.
  58  */
  59
  60 /*
  61  * Rate of allocating memory when there is no memory pressure
  62  * (driver performs non-sleeping allocations).
  63  */
  64 #define VMW_BALLOON_NOSLEEP_ALLOC_MAX   16384U
  65
  66 /*
  67  * Rates of memory allocaton when guest experiences memory pressure
  68  * (driver performs sleeping allocations).
  69  */
  70 #define VMW_BALLOON_RATE_ALLOC_MIN      512U
  71 #define VMW_BALLOON_RATE_ALLOC_MAX      2048U
  72 #define VMW_BALLOON_RATE_ALLOC_INC      16U
  73
  74 /*
  75  * Rates for releasing pages while deflating balloon.
  76  */
  77 #define VMW_BALLOON_RATE_FREE_MIN       512U
  78 #define VMW_BALLOON_RATE_FREE_MAX       16384U
  79 #define VMW_BALLOON_RATE_FREE_INC       16U
  80
  81 /*
  82  * When guest is under memory pressure, use a reduced page allocation
  83  * rate for next several cycles.
  84  */
  85 #define VMW_BALLOON_SLOW_CYCLES         4
  86
  87 /*
  88  * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
  89  * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
  90  * __GFP_NOWARN, to suppress page allocation failure warnings.
  91  */
  92 #define VMW_PAGE_ALLOC_NOSLEEP          (__GFP_HIGHMEM|__GFP_NOWARN)
  93
  94 /*
  95  * Use GFP_HIGHUSER when executing in a separate kernel thread
  96  * context and allocation can sleep.  This is less stressful to
  97  * the guest memory system, since it allows the thread to block
  98  * while memory is reclaimed, and won't take pages from emergency
  99  * low-memory pools.
 100  */
 101 #define VMW_PAGE_ALLOC_CANSLEEP         (GFP_HIGHUSER)
 102
 103 /* Maximum number of page allocations without yielding processor */
 104 #define VMW_BALLOON_YIELD_THRESHOLD     1024
 105
 106 /* Maximum number of refused pages we accumulate during inflation cycle */
 107 #define VMW_BALLOON_MAX_REFUSED         16
 108
 109 /*
 110  * Hypervisor communication port definitions.
 111  */
 112 #define VMW_BALLOON_HV_PORT             0x5670
 113 #define VMW_BALLOON_HV_MAGIC            0x456c6d6f
 114 #define VMW_BALLOON_GUEST_ID            1       /* Linux */
 115
 116 enum vmwballoon_capabilities {
 117         /*
 118          * Bit 0 is reserved and not associated to any capability.
 119          */
 120         VMW_BALLOON_BASIC_CMDS          = (1 << 1),
 121         VMW_BALLOON_BATCHED_CMDS        = (1 << 2)
 122 };
 123
 124 #define VMW_BALLOON_CAPABILITIES        (VMW_BALLOON_BASIC_CMDS \
 125                                         | VMW_BALLOON_BATCHED_CMDS)
 126
 127 /*
 128  * Backdoor commands availability:
 129  *
 130  * START, GET_TARGET and GUEST_ID are always available,
 131  *
 132  * VMW_BALLOON_BASIC_CMDS:
 133  *      LOCK and UNLOCK commands,
 134  * VMW_BALLOON_BATCHED_CMDS:
 135  *      BATCHED_LOCK and BATCHED_UNLOCK commands.
 136  */
 137 #define VMW_BALLOON_CMD_START           0
 138 #define VMW_BALLOON_CMD_GET_TARGET      1
 139 #define VMW_BALLOON_CMD_LOCK            2
 140 #define VMW_BALLOON_CMD_UNLOCK          3
 141 #define VMW_BALLOON_CMD_GUEST_ID        4
 142 #define VMW_BALLOON_CMD_BATCHED_LOCK    6
 143 #define VMW_BALLOON_CMD_BATCHED_UNLOCK  7
 144
 145 /* error codes */
 146 #define VMW_BALLOON_SUCCESS                     0
 147 #define VMW_BALLOON_FAILURE                     -1
 148 #define VMW_BALLOON_ERROR_CMD_INVALID           1
 149 #define VMW_BALLOON_ERROR_PPN_INVALID           2
 150 #define VMW_BALLOON_ERROR_PPN_LOCKED            3
 151 #define VMW_BALLOON_ERROR_PPN_UNLOCKED          4
 152 #define VMW_BALLOON_ERROR_PPN_PINNED            5
 153 #define VMW_BALLOON_ERROR_PPN_NOTNEEDED         6
 154 #define VMW_BALLOON_ERROR_RESET                 7
 155 #define VMW_BALLOON_ERROR_BUSY                  8
 156
 157 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES   (0x03000000)
 158
 159 /* Batch page description */
 160
 161 /*
 162  * Layout of a page in the batch page:
 163  *
 164  * +-------------+----------+--------+
 165  * |             |          |        |
 166  * | Page number | Reserved | Status |
 167  * |             |          |        |
 168  * +-------------+----------+--------+
 169  * 64  PAGE_SHIFT          6         0
 170  *
 171  * For now only 4K pages are supported, but we can easily support large pages
 172  * by using bits in the reserved field.
 173  *
 174  * The reserved field should be set to 0.
 175  */
 176 #define VMW_BALLOON_BATCH_MAX_PAGES     (PAGE_SIZE / sizeof(u64))
 177 #define VMW_BALLOON_BATCH_STATUS_MASK   ((1UL << 5) - 1)
 178 #define VMW_BALLOON_BATCH_PAGE_MASK     (~((1UL << PAGE_SHIFT) - 1))
 179
 180 struct vmballoon_batch_page {
 181         u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
 182 };
 183
 184 static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
 185 {
 186         return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
 187 }
 188
 189 static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
 190                                 int idx)
 191 {
 192         return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
 193 }
 194
 195 static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
 196                                 u64 pa)
 197 {
 198         batch->pages[idx] = pa;
 199 }
 200
 201
 202 #define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)             \
 203 ({                                                              \
 204         unsigned long __status, __dummy1, __dummy2, __dummy3;   \
 205         __asm__ __volatile__ ("inl %%dx" :                      \
 206                 "=a"(__status),                                 \
 207                 "=c"(__dummy1),                                 \
 208                 "=d"(__dummy2),                                 \
 209                 "=b"(result),                                   \
 210                 "=S" (__dummy3) :                               \
 211                 "0"(VMW_BALLOON_HV_MAGIC),                      \
 212                 "1"(VMW_BALLOON_CMD_##cmd),                     \
 213                 "2"(VMW_BALLOON_HV_PORT),                       \
 214                 "3"(arg1),                                      \
 215                 "4" (arg2) :                                    \
 216                 "memory");                                      \
 217         if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START)     \
 218                 result = __dummy1;                              \
 219         result &= -1UL;                                         \
 220         __status & -1UL;                                        \
 221 })
 222
 223 #ifdef CONFIG_DEBUG_FS
 224 struct vmballoon_stats {
 225         unsigned int timer;
 226
 227         /* allocation statistics */
 228         unsigned int alloc;
 229         unsigned int alloc_fail;
 230         unsigned int sleep_alloc;
 231         unsigned int sleep_alloc_fail;
 232         unsigned int refused_alloc;
 233         unsigned int refused_free;
 234         unsigned int free;
 235
 236         /* monitor operations */
 237         unsigned int lock;
 238         unsigned int lock_fail;
 239         unsigned int unlock;
 240         unsigned int unlock_fail;
 241         unsigned int target;
 242         unsigned int target_fail;
 243         unsigned int start;
 244         unsigned int start_fail;
 245         unsigned int guest_type;
 246         unsigned int guest_type_fail;
 247 };
 248
 249 #define STATS_INC(stat) (stat)++
 250 #else
 251 #define STATS_INC(stat)
 252 #endif
 253
 254 struct vmballoon;
 255
 256 struct vmballoon_ops {
 257         void (*add_page)(struct vmballoon *b, int idx, struct page *p);
 258         int (*lock)(struct vmballoon *b, unsigned int num_pages,
 259                                                 unsigned int *target);
 260         int (*unlock)(struct vmballoon *b, unsigned int num_pages,
 261                                                 unsigned int *target);
 262 };
 263
 264 struct vmballoon {
 265
 266         /* list of reserved physical pages */
 267         struct list_head pages;
 268
 269         /* transient list of non-balloonable pages */
 270         struct list_head refused_pages;
 271         unsigned int n_refused_pages;
 272
 273         /* balloon size in pages */
 274         unsigned int size;
 275         unsigned int target;
 276
 277         /* reset flag */
 278         bool reset_required;
 279
 280         /* adjustment rates (pages per second) */
 281         unsigned int rate_alloc;
 282         unsigned int rate_free;
 283
 284         /* slowdown page allocations for next few cycles */
 285         unsigned int slow_allocation_cycles;
 286
 287         unsigned long capabilities;
 288
 289         struct vmballoon_batch_page *batch_page;
 290         unsigned int batch_max_pages;
 291         struct page *page;
 292
 293         const struct vmballoon_ops *ops;
 294
 295 #ifdef CONFIG_DEBUG_FS
 296         /* statistics */
 297         struct vmballoon_stats stats;
 298
 299         /* debugfs file exporting statistics */
 300         struct dentry *dbg_entry;
 301 #endif
 302
 303         struct sysinfo sysinfo;
 304
 305         struct delayed_work dwork;
 306 };
 307
 308 static struct vmballoon balloon;
 309
 310 /*
 311  * Send "start" command to the host, communicating supported version
 312  * of the protocol.
 313  */
 314 static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
 315 {
 316         unsigned long status, capabilities, dummy = 0;
 317
 318         STATS_INC(b->stats.start);
 319
 320         status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
 321
 322         switch (status) {
 323         case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
 324                 b->capabilities = capabilities;
 325                 return true;
 326         case VMW_BALLOON_SUCCESS:
 327                 b->capabilities = VMW_BALLOON_BASIC_CMDS;
 328                 return true;
 329         }
 330
 331         pr_debug("%s - failed, hv returns %ld\n", __func__, status);
 332         STATS_INC(b->stats.start_fail);
 333         return false;
 334 }
 335
 336 static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
 337 {
 338         switch (status) {
 339         case VMW_BALLOON_SUCCESS:
 340                 return true;
 341
 342         case VMW_BALLOON_ERROR_RESET:
 343                 b->reset_required = true;
 344                 /* fall through */
 345
 346         default:
 347                 return false;
 348         }
 349 }
 350
 351 /*
 352  * Communicate guest type to the host so that it can adjust ballooning
 353  * algorithm to the one most appropriate for the guest. This command
 354  * is normally issued after sending "start" command and is part of
 355  * standard reset sequence.
 356  */
 357 static bool vmballoon_send_guest_id(struct vmballoon *b)
 358 {
 359         unsigned long status, dummy = 0;
 360
 361         status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
 362                                 dummy);
 363
 364         STATS_INC(b->stats.guest_type);
 365
 366         if (vmballoon_check_status(b, status))
 367                 return true;
 368
 369         pr_debug("%s - failed, hv returns %ld\n", __func__, status);
 370         STATS_INC(b->stats.guest_type_fail);
 371         return false;
 372 }
 373
 374 /*
 375  * Retrieve desired balloon size from the host.
 376  */
 377 static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
 378 {
 379         unsigned long status;
 380         unsigned long target;
 381         unsigned long limit;
 382         unsigned long dummy = 0;
 383         u32 limit32;
 384
 385         /*
 386          * si_meminfo() is cheap. Moreover, we want to provide dynamic
 387          * max balloon size later. So let us call si_meminfo() every
 388          * iteration.
 389          */
 390         si_meminfo(&b->sysinfo);
 391         limit = b->sysinfo.totalram;
 392
 393         /* Ensure limit fits in 32-bits */
 394         limit32 = (u32)limit;
 395         if (limit != limit32)
 396                 return false;
 397
 398         /* update stats */
 399         STATS_INC(b->stats.target);
 400
 401         status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
 402         if (vmballoon_check_status(b, status)) {
 403                 *new_target = target;
 404                 return true;
 405         }
 406
 407         pr_debug("%s - failed, hv returns %ld\n", __func__, status);
 408         STATS_INC(b->stats.target_fail);
 409         return false;
 410 }
 411
 412 /*
 413  * Notify the host about allocated page so that host can use it without
 414  * fear that guest will need it. Host may reject some pages, we need to
 415  * check the return value and maybe submit a different page.
 416  */
 417 static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
 418                                 unsigned int *hv_status, unsigned int *target)
 419 {
 420         unsigned long status, dummy = 0;
 421         u32 pfn32;
 422
 423         pfn32 = (u32)pfn;
 424         if (pfn32 != pfn)
 425                 return -1;
 426
 427         STATS_INC(b->stats.lock);
 428
 429         *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
 430         if (vmballoon_check_status(b, status))
 431                 return 0;
 432
 433         pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
 434         STATS_INC(b->stats.lock_fail);
 435         return 1;
 436 }
 437
 438 static int vmballoon_send_batched_lock(struct vmballoon *b,
 439                                 unsigned int num_pages, unsigned int *target)
 440 {
 441         unsigned long status;
 442         unsigned long pfn = page_to_pfn(b->page);
 443
 444         STATS_INC(b->stats.lock);
 445
 446         status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, *target);
 447         if (vmballoon_check_status(b, status))
 448                 return 0;
 449
 450         pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
 451         STATS_INC(b->stats.lock_fail);
 452         return 1;
 453 }
 454
 455 /*
 456  * Notify the host that guest intends to release given page back into
 457  * the pool of available (to the guest) pages.
 458  */
 459 static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
 460                                                         unsigned int *target)
 461 {
 462         unsigned long status, dummy = 0;
 463         u32 pfn32;
 464
 465         pfn32 = (u32)pfn;
 466         if (pfn32 != pfn)
 467                 return false;
 468
 469         STATS_INC(b->stats.unlock);
 470
 471         status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
 472         if (vmballoon_check_status(b, status))
 473                 return true;
 474
 475         pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
 476         STATS_INC(b->stats.unlock_fail);
 477         return false;
 478 }
 479
 480 static bool vmballoon_send_batched_unlock(struct vmballoon *b,
 481                                 unsigned int num_pages, unsigned int *target)
 482 {
 483         unsigned long status;
 484         unsigned long pfn = page_to_pfn(b->page);
 485
 486         STATS_INC(b->stats.unlock);
 487
 488         status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, *target);
 489         if (vmballoon_check_status(b, status))
 490                 return true;
 491
 492         pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
 493         STATS_INC(b->stats.unlock_fail);
 494         return false;
 495 }
 496
 497 /*
 498  * Quickly release all pages allocated for the balloon. This function is
 499  * called when host decides to "reset" balloon for one reason or another.
 500  * Unlike normal "deflate" we do not (shall not) notify host of the pages
 501  * being released.
 502  */
 503 static void vmballoon_pop(struct vmballoon *b)
 504 {
 505         struct page *page, *next;
 506         unsigned int count = 0;
 507
 508         list_for_each_entry_safe(page, next, &b->pages, lru) {
 509                 list_del(&page->lru);
 510                 __free_page(page);
 511                 STATS_INC(b->stats.free);
 512                 b->size--;
 513
 514                 if (++count >= b->rate_free) {
 515                         count = 0;
 516                         cond_resched();
 517                 }
 518         }
 519
 520         if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
 521                 if (b->batch_page)
 522                         vunmap(b->batch_page);
 523
 524                 if (b->page)
 525                         __free_page(b->page);
 526         }
 527 }
 528
 529 /*
 530  * Notify the host of a ballooned page. If host rejects the page put it on the
 531  * refuse list, those refused page are then released at the end of the
 532  * inflation cycle.
 533  */
 534 static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
 535                                                         unsigned int *target)
 536 {
 537         int locked, hv_status;
 538         struct page *page = b->page;
 539
 540         locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
 541                                                                 target);
 542         if (locked > 0) {
 543                 STATS_INC(b->stats.refused_alloc);
 544
 545                 if (hv_status == VMW_BALLOON_ERROR_RESET ||
 546                                 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
 547                         __free_page(page);
 548                         return -EIO;
 549                 }
 550
 551                 /*
 552                  * Place page on the list of non-balloonable pages
 553                  * and retry allocation, unless we already accumulated
 554                  * too many of them, in which case take a breather.
 555                  */
 556                 if (b->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
 557                         b->n_refused_pages++;
 558                         list_add(&page->lru, &b->refused_pages);
 559                 } else {
 560                         __free_page(page);
 561                 }
 562                 return -EIO;
 563         }
 564
 565         /* track allocated page */
 566         list_add(&page->lru, &b->pages);
 567
 568         /* update balloon size */
 569         b->size++;
 570
 571         return 0;
 572 }
 573
 574 static int vmballoon_lock_batched_page(struct vmballoon *b,
 575                                 unsigned int num_pages, unsigned int *target)
 576 {
 577         int locked, i;
 578
 579         locked = vmballoon_send_batched_lock(b, num_pages, target);
 580         if (locked > 0) {
 581                 for (i = 0; i < num_pages; i++) {
 582                         u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
 583                         struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
 584
 585                         __free_page(p);
 586                 }
 587
 588                 return -EIO;
 589         }
 590
 591         for (i = 0; i < num_pages; i++) {
 592                 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
 593                 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
 594
 595                 locked = vmballoon_batch_get_status(b->batch_page, i);
 596
 597                 switch (locked) {
 598                 case VMW_BALLOON_SUCCESS:
 599                         list_add(&p->lru, &b->pages);
 600                         b->size++;
 601                         break;
 602                 case VMW_BALLOON_ERROR_PPN_PINNED:
 603                 case VMW_BALLOON_ERROR_PPN_INVALID:
 604                         if (b->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
 605                                 list_add(&p->lru, &b->refused_pages);
 606                                 b->n_refused_pages++;
 607                                 break;
 608                         }
 609                         /* Fallthrough */
 610                 case VMW_BALLOON_ERROR_RESET:
 611                 case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
 612                         __free_page(p);
 613                         break;
 614                 default:
 615                         /* This should never happen */
 616                         WARN_ON_ONCE(true);
 617                 }
 618         }
 619
 620         return 0;
 621 }
 622
 623 /*
 624  * Release the page allocated for the balloon. Note that we first notify
 625  * the host so it can make sure the page will be available for the guest
 626  * to use, if needed.
 627  */
 628 static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
 629                                                         unsigned int *target)
 630 {
 631         struct page *page = b->page;
 632
 633         if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
 634                 list_add(&page->lru, &b->pages);
 635                 return -EIO;
 636         }
 637
 638         /* deallocate page */
 639         __free_page(page);
 640         STATS_INC(b->stats.free);
 641
 642         /* update balloon size */
 643         b->size--;
 644
 645         return 0;
 646 }
 647
 648 static int vmballoon_unlock_batched_page(struct vmballoon *b,
 649                                 unsigned int num_pages, unsigned int *target)
 650 {
 651         int locked, i, ret = 0;
 652         bool hv_success;
 653
 654         hv_success = vmballoon_send_batched_unlock(b, num_pages, target);
 655         if (!hv_success)
 656                 ret = -EIO;
 657
 658         for (i = 0; i < num_pages; i++) {
 659                 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
 660                 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
 661
 662                 locked = vmballoon_batch_get_status(b->batch_page, i);
 663                 if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
 664                         /*
 665                          * That page wasn't successfully unlocked by the
 666                          * hypervisor, re-add it to the list of pages owned by
 667                          * the balloon driver.
 668                          */
 669                         list_add(&p->lru, &b->pages);
 670                 } else {
 671                         /* deallocate page */
 672                         __free_page(p);
 673                         STATS_INC(b->stats.free);
 674
 675                         /* update balloon size */
 676                         b->size--;
 677                 }
 678         }
 679
 680         return ret;
 681 }
 682
 683 /*
 684  * Release pages that were allocated while attempting to inflate the
 685  * balloon but were refused by the host for one reason or another.
 686  */
 687 static void vmballoon_release_refused_pages(struct vmballoon *b)
 688 {
 689         struct page *page, *next;
 690
 691         list_for_each_entry_safe(page, next, &b->refused_pages, lru) {
 692                 list_del(&page->lru);
 693                 __free_page(page);
 694                 STATS_INC(b->stats.refused_free);
 695         }
 696
 697         b->n_refused_pages = 0;
 698 }
 699
 700 static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
 701 {
 702         b->page = p;
 703 }
 704
 705 static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
 706                                 struct page *p)
 707 {
 708         vmballoon_batch_set_pa(b->batch_page, idx,
 709                         (u64)page_to_pfn(p) << PAGE_SHIFT);
 710 }
 711
 712 /*
 713  * Inflate the balloon towards its target size. Note that we try to limit
 714  * the rate of allocation to make sure we are not choking the rest of the
 715  * system.
 716  */
 717 static void vmballoon_inflate(struct vmballoon *b)
 718 {
 719         unsigned int rate;
 720         unsigned int allocations = 0;
 721         unsigned int num_pages = 0;
 722         int error = 0;
 723         gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
 724
 725         pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
 726
 727         /*
 728          * First try NOSLEEP page allocations to inflate balloon.
 729          *
 730          * If we do not throttle nosleep allocations, we can drain all
 731          * free pages in the guest quickly (if the balloon target is high).
 732          * As a side-effect, draining free pages helps to inform (force)
 733          * the guest to start swapping if balloon target is not met yet,
 734          * which is a desired behavior. However, balloon driver can consume
 735          * all available CPU cycles if too many pages are allocated in a
 736          * second. Therefore, we throttle nosleep allocations even when
 737          * the guest is not under memory pressure. OTOH, if we have already
 738          * predicted that the guest is under memory pressure, then we
 739          * slowdown page allocations considerably.
 740          */
 741
 742         /*
 743          * Start with no sleep allocation rate which may be higher
 744          * than sleeping allocation rate.
 745          */
 746         rate = b->slow_allocation_cycles ?
 747                         b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX;
 748
 749         pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n",
 750                  __func__, b->target - b->size, rate, b->rate_alloc);
 751
 752         while (b->size < b->target && num_pages < b->target - b->size) {
 753                 struct page *page;
 754
 755                 if (flags == VMW_PAGE_ALLOC_NOSLEEP)
 756                         STATS_INC(b->stats.alloc);
 757                 else
 758                         STATS_INC(b->stats.sleep_alloc);
 759
 760                 page = alloc_page(flags);
 761                 if (!page) {
 762                         if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
 763                                 /*
 764                                  * CANSLEEP page allocation failed, so guest
 765                                  * is under severe memory pressure. Quickly
 766                                  * decrease allocation rate.
 767                                  */
 768                                 b->rate_alloc = max(b->rate_alloc / 2,
 769                                                     VMW_BALLOON_RATE_ALLOC_MIN);
 770                                 STATS_INC(b->stats.sleep_alloc_fail);
 771                                 break;
 772                         }
 773                         STATS_INC(b->stats.alloc_fail);
 774
 775                         /*
 776                          * NOSLEEP page allocation failed, so the guest is
 777                          * under memory pressure. Let us slow down page
 778                          * allocations for next few cycles so that the guest
 779                          * gets out of memory pressure. Also, if we already
 780                          * allocated b->rate_alloc pages, let's pause,
 781                          * otherwise switch to sleeping allocations.
 782                          */
 783                         b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
 784
 785                         if (allocations >= b->rate_alloc)
 786                                 break;
 787
 788                         flags = VMW_PAGE_ALLOC_CANSLEEP;
 789                         /* Lower rate for sleeping allocations. */
 790                         rate = b->rate_alloc;
 791                         continue;
 792                 }
 793
 794                 b->ops->add_page(b, num_pages++, page);
 795                 if (num_pages == b->batch_max_pages) {
 796                         error = b->ops->lock(b, num_pages, &b->target);
 797                         num_pages = 0;
 798                         if (error)
 799                                 break;
 800                 }
 801
 802                 if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) {
 803                         cond_resched();
 804                         allocations = 0;
 805                 }
 806
 807                 if (allocations >= rate) {
 808                         /* We allocated enough pages, let's take a break. */
 809                         break;
 810                 }
 811         }
 812
 813         if (num_pages > 0)
 814                 b->ops->lock(b, num_pages, &b->target);
 815
 816         /*
 817          * We reached our goal without failures so try increasing
 818          * allocation rate.
 819          */
 820         if (error == 0 && allocations >= b->rate_alloc) {
 821                 unsigned int mult = allocations / b->rate_alloc;
 822
 823                 b->rate_alloc =
 824                         min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
 825                             VMW_BALLOON_RATE_ALLOC_MAX);
 826         }
 827
 828         vmballoon_release_refused_pages(b);
 829 }
 830
 831 /*
 832  * Decrease the size of the balloon allowing guest to use more memory.
 833  */
 834 static void vmballoon_deflate(struct vmballoon *b)
 835 {
 836         struct page *page, *next;
 837         unsigned int i = 0;
 838         unsigned int num_pages = 0;
 839         int error;
 840
 841         pr_debug("%s - size: %d, target %d, rate: %d\n", __func__, b->size,
 842                                                 b->target, b->rate_free);
 843
 844         /* free pages to reach target */
 845         list_for_each_entry_safe(page, next, &b->pages, lru) {
 846                 list_del(&page->lru);
 847                 b->ops->add_page(b, num_pages++, page);
 848
 849                 if (num_pages == b->batch_max_pages) {
 850                         error = b->ops->unlock(b, num_pages, &b->target);
 851                         num_pages = 0;
 852                         if (error) {
 853                                 /* quickly decrease rate in case of error */
 854                                 b->rate_free = max(b->rate_free / 2,
 855                                                 VMW_BALLOON_RATE_FREE_MIN);
 856                                 return;
 857                         }
 858                 }
 859
 860                 if (++i >= b->size - b->target)
 861                         break;
 862         }
 863
 864         if (num_pages > 0)
 865                 b->ops->unlock(b, num_pages, &b->target);
 866
 867         /* slowly increase rate if there were no errors */
 868         if (error == 0)
 869                 b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC,
 870                                    VMW_BALLOON_RATE_FREE_MAX);
 871 }
 872
 873 static const struct vmballoon_ops vmballoon_basic_ops = {
 874         .add_page = vmballoon_add_page,
 875         .lock = vmballoon_lock_page,
 876         .unlock = vmballoon_unlock_page
 877 };
 878
 879 static const struct vmballoon_ops vmballoon_batched_ops = {
 880         .add_page = vmballoon_add_batched_page,
 881         .lock = vmballoon_lock_batched_page,
 882         .unlock = vmballoon_unlock_batched_page
 883 };
 884
 885 static bool vmballoon_init_batching(struct vmballoon *b)
 886 {
 887         b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
 888         if (!b->page)
 889                 return false;
 890
 891         b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
 892         if (!b->batch_page) {
 893                 __free_page(b->page);
 894                 return false;
 895         }
 896
 897         return true;
 898 }
 899
 900 /*
 901  * Perform standard reset sequence by popping the balloon (in case it
 902  * is not  empty) and then restarting protocol. This operation normally
 903  * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
 904  */
 905 static void vmballoon_reset(struct vmballoon *b)
 906 {
 907         /* free all pages, skipping monitor unlock */
 908         vmballoon_pop(b);
 909
 910         if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
 911                 return;
 912
 913         if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
 914                 b->ops = &vmballoon_batched_ops;
 915                 b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
 916                 if (!vmballoon_init_batching(b)) {
 917                         /*
 918                          * We failed to initialize batching, inform the monitor
 919                          * about it by sending a null capability.
 920                          *
 921                          * The guest will retry in one second.
 922                          */
 923                         vmballoon_send_start(b, 0);
 924                         return;
 925                 }
 926         } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
 927                 b->ops = &vmballoon_basic_ops;
 928                 b->batch_max_pages = 1;
 929         }
 930
 931         b->reset_required = false;
 932         if (!vmballoon_send_guest_id(b))
 933                 pr_err("failed to send guest ID to the host\n");
 934 }
 935
 936 /*
 937  * Balloon work function: reset protocol, if needed, get the new size and
 938  * adjust balloon as needed. Repeat in 1 sec.
 939  */
 940 static void vmballoon_work(struct work_struct *work)
 941 {
 942         struct delayed_work *dwork = to_delayed_work(work);
 943         struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
 944         unsigned int target;
 945
 946         STATS_INC(b->stats.timer);
 947
 948         if (b->reset_required)
 949                 vmballoon_reset(b);
 950
 951         if (b->slow_allocation_cycles > 0)
 952                 b->slow_allocation_cycles--;
 953
 954         if (vmballoon_send_get_target(b, &target)) {
 955                 /* update target, adjust size */
 956                 b->target = target;
 957
 958                 if (b->size < target)
 959                         vmballoon_inflate(b);
 960                 else if (b->size > target)
 961                         vmballoon_deflate(b);
 962         }
 963
 964         /*
 965          * We are using a freezable workqueue so that balloon operations are
 966          * stopped while the system transitions to/from sleep/hibernation.
 967          */
 968         queue_delayed_work(system_freezable_wq,
 969                            dwork, round_jiffies_relative(HZ));
 970 }
 971
 972 /*
 973  * DEBUGFS Interface
 974  */
 975 #ifdef CONFIG_DEBUG_FS
 976
 977 static int vmballoon_debug_show(struct seq_file *f, void *offset)
 978 {
 979         struct vmballoon *b = f->private;
 980         struct vmballoon_stats *stats = &b->stats;
 981
 982         /* format size info */
 983         seq_printf(f,
 984                    "target:             %8d pages\n"
 985                    "current:            %8d pages\n",
 986                    b->target, b->size);
 987
 988         /* format rate info */
 989         seq_printf(f,
 990                    "rateNoSleepAlloc:   %8d pages/sec\n"
 991                    "rateSleepAlloc:     %8d pages/sec\n"
 992                    "rateFree:           %8d pages/sec\n",
 993                    VMW_BALLOON_NOSLEEP_ALLOC_MAX,
 994                    b->rate_alloc, b->rate_free);
 995
 996         seq_printf(f,
 997                    "\n"
 998                    "timer:              %8u\n"
 999                    "start:              %8u (%4u failed)\n"
1000                    "guestType:          %8u (%4u failed)\n"
1001                    "lock:               %8u (%4u failed)\n"
1002                    "unlock:             %8u (%4u failed)\n"
1003                    "target:             %8u (%4u failed)\n"
1004                    "primNoSleepAlloc:   %8u (%4u failed)\n"
1005                    "primCanSleepAlloc:  %8u (%4u failed)\n"
1006                    "primFree:           %8u\n"
1007                    "errAlloc:           %8u\n"
1008                    "errFree:            %8u\n",
1009                    stats->timer,
1010                    stats->start, stats->start_fail,
1011                    stats->guest_type, stats->guest_type_fail,
1012                    stats->lock,  stats->lock_fail,
1013                    stats->unlock, stats->unlock_fail,
1014                    stats->target, stats->target_fail,
1015                    stats->alloc, stats->alloc_fail,
1016                    stats->sleep_alloc, stats->sleep_alloc_fail,
1017                    stats->free,
1018                    stats->refused_alloc, stats->refused_free);
1019
1020         return 0;
1021 }
1022
1023 static int vmballoon_debug_open(struct inode *inode, struct file *file)
1024 {
1025         return single_open(file, vmballoon_debug_show, inode->i_private);
1026 }
1027
1028 static const struct file_operations vmballoon_debug_fops = {
1029         .owner          = THIS_MODULE,
1030         .open           = vmballoon_debug_open,
1031         .read           = seq_read,
1032         .llseek         = seq_lseek,
1033         .release        = single_release,
1034 };
1035
1036 static int __init vmballoon_debugfs_init(struct vmballoon *b)
1037 {
1038         int error;
1039
1040         b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1041                                            &vmballoon_debug_fops);
1042         if (IS_ERR(b->dbg_entry)) {
1043                 error = PTR_ERR(b->dbg_entry);
1044                 pr_err("failed to create debugfs entry, error: %d\n", error);
1045                 return error;
1046         }
1047
1048         return 0;
1049 }
1050
1051 static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1052 {
1053         debugfs_remove(b->dbg_entry);
1054 }
1055
1056 #else
1057
1058 static inline int vmballoon_debugfs_init(struct vmballoon *b)
1059 {
1060         return 0;
1061 }
1062
1063 static inline void vmballoon_debugfs_exit(struct vmballoon *b)
1064 {
1065 }
1066
1067 #endif  /* CONFIG_DEBUG_FS */
1068
1069 static int __init vmballoon_init(void)
1070 {
1071         int error;
1072
1073         /*
1074          * Check if we are running on VMware's hypervisor and bail out
1075          * if we are not.
1076          */
1077         if (x86_hyper != &x86_hyper_vmware)
1078                 return -ENODEV;
1079
1080         INIT_LIST_HEAD(&balloon.pages);
1081         INIT_LIST_HEAD(&balloon.refused_pages);
1082
1083         /* initialize rates */
1084         balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
1085         balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX;
1086
1087         INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1088
1089         /*
1090          * Start balloon.
1091          */
1092         if (!vmballoon_send_start(&balloon, VMW_BALLOON_CAPABILITIES)) {
1093                 pr_err("failed to send start command to the host\n");
1094                 return -EIO;
1095         }
1096
1097         if ((balloon.capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1098                 balloon.ops = &vmballoon_batched_ops;
1099                 balloon.batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1100                 if (!vmballoon_init_batching(&balloon)) {
1101                         pr_err("failed to init batching\n");
1102                         return -EIO;
1103                 }
1104         } else if ((balloon.capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1105                 balloon.ops = &vmballoon_basic_ops;
1106                 balloon.batch_max_pages = 1;
1107         }
1108
1109         if (!vmballoon_send_guest_id(&balloon)) {
1110                 pr_err("failed to send guest ID to the host\n");
1111                 return -EIO;
1112         }
1113
1114         error = vmballoon_debugfs_init(&balloon);
1115         if (error)
1116                 return error;
1117
1118         queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
1119
1120         return 0;
1121 }
1122 module_init(vmballoon_init);
1123
1124 static void __exit vmballoon_exit(void)
1125 {
1126         cancel_delayed_work_sync(&balloon.dwork);
1127
1128         vmballoon_debugfs_exit(&balloon);
1129
1130         /*
1131          * Deallocate all reserved memory, and reset connection with monitor.
1132          * Reset connection before deallocating memory to avoid potential for
1133          * additional spurious resets from guest touching deallocated pages.
1134          */
1135         vmballoon_send_start(&balloon, VMW_BALLOON_CAPABILITIES);
1136         vmballoon_pop(&balloon);
1137 }
1138 module_exit(vmballoon_exit);