usb: dwc_otg_310: support vbus controlled by both gpio and pmic
[firefly-linux-kernel-4.4.55.git] / drivers / hv / hv_balloon.c
index 4c605c70ebf9ed858d014d94383ebfab86851b1f..b853b4b083bde2c402ee4dbacdd25736b4273e58 100644 (file)
@@ -19,6 +19,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/jiffies.h>
 #include <linux/mman.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 enum {
        DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
        DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
+       DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0),
 
        DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
        DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
+       DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3,
 
-       DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN8
+       DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10
 };
 
 
@@ -427,14 +430,13 @@ struct dm_info_msg {
  * currently hot added. We hot add in multiples of 128M
  * chunks; it is possible that we may not be able to bring
  * online all the pages in the region. The range
- * covered_start_pfn : covered_end_pfn defines the pages that can
+ * covered_end_pfn defines the pages that can
  * be brough online.
  */
 
 struct hv_hotadd_state {
        struct list_head list;
        unsigned long start_pfn;
-       unsigned long covered_start_pfn;
        unsigned long covered_end_pfn;
        unsigned long ha_end_pfn;
        unsigned long end_pfn;
@@ -459,6 +461,11 @@ static bool do_hot_add;
  */
 static uint pressure_report_delay = 45;
 
+/*
+ * The last time we posted a pressure report to host.
+ */
+static unsigned long last_post_time;
+
 module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
 MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
 
@@ -497,6 +504,8 @@ struct hv_dynmem_device {
         * Number of pages we have currently ballooned out.
         */
        unsigned int num_pages_ballooned;
+       unsigned int num_pages_onlined;
+       unsigned int num_pages_added;
 
        /*
         * State to manage the ballooning (up) operation.
@@ -527,6 +536,8 @@ struct hv_dynmem_device {
         */
        struct task_struct *thread;
 
+       struct mutex ha_region_mutex;
+
        /*
         * A list of hot-add regions.
         */
@@ -542,7 +553,48 @@ struct hv_dynmem_device {
 
 static struct hv_dynmem_device dm_device;
 
+static void post_status(struct hv_dynmem_device *dm);
+
 #ifdef CONFIG_MEMORY_HOTPLUG
+static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
+                             void *v)
+{
+       struct memory_notify *mem = (struct memory_notify *)v;
+
+       switch (val) {
+       case MEM_GOING_ONLINE:
+               mutex_lock(&dm_device.ha_region_mutex);
+               break;
+
+       case MEM_ONLINE:
+               dm_device.num_pages_onlined += mem->nr_pages;
+       case MEM_CANCEL_ONLINE:
+               if (val == MEM_ONLINE ||
+                   mutex_is_locked(&dm_device.ha_region_mutex))
+                       mutex_unlock(&dm_device.ha_region_mutex);
+               if (dm_device.ha_waiting) {
+                       dm_device.ha_waiting = false;
+                       complete(&dm_device.ol_waitevent);
+               }
+               break;
+
+       case MEM_OFFLINE:
+               mutex_lock(&dm_device.ha_region_mutex);
+               dm_device.num_pages_onlined -= mem->nr_pages;
+               mutex_unlock(&dm_device.ha_region_mutex);
+               break;
+       case MEM_GOING_OFFLINE:
+       case MEM_CANCEL_OFFLINE:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block hv_memory_nb = {
+       .notifier_call = hv_memory_notifier,
+       .priority = 0
+};
+
 
 static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
 {
@@ -562,7 +614,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
                                struct hv_hotadd_state *has)
 {
        int ret = 0;
-       int i, nid, t;
+       int i, nid;
        unsigned long start_pfn;
        unsigned long processed_pfn;
        unsigned long total_pfn = pfn_count;
@@ -584,6 +636,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
                init_completion(&dm_device.ol_waitevent);
                dm_device.ha_waiting = true;
 
+               mutex_unlock(&dm_device.ha_region_mutex);
                nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
                ret = add_memory(nid, PFN_PHYS((start_pfn)),
                                (HA_CHUNK << PAGE_SHIFT));
@@ -602,20 +655,19 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
                        }
                        has->ha_end_pfn -= HA_CHUNK;
                        has->covered_end_pfn -=  processed_pfn;
+                       mutex_lock(&dm_device.ha_region_mutex);
                        break;
                }
 
                /*
                 * Wait for the memory block to be onlined.
+                * Since the hot add has succeeded, it is ok to
+                * proceed even if the pages in the hot added region
+                * have not been "onlined" within the allowed time.
                 */
-               t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-               if (t == 0) {
-                       pr_info("hot_add memory timedout\n");
-                       has->ha_end_pfn -= HA_CHUNK;
-                       has->covered_end_pfn -=  processed_pfn;
-                       break;
-               }
-
+               wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
+               mutex_lock(&dm_device.ha_region_mutex);
+               post_status(&dm_device);
        }
 
        return;
@@ -628,15 +680,9 @@ static void hv_online_page(struct page *pg)
        unsigned long cur_start_pgp;
        unsigned long cur_end_pgp;
 
-       if (dm_device.ha_waiting) {
-               dm_device.ha_waiting = false;
-               complete(&dm_device.ol_waitevent);
-       }
-
        list_for_each(cur, &dm_device.ha_region_list) {
                has = list_entry(cur, struct hv_hotadd_state, list);
-               cur_start_pgp = (unsigned long)
-                               pfn_to_page(has->covered_start_pfn);
+               cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn);
                cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
 
                if (((unsigned long)pg >= cur_start_pgp) &&
@@ -648,7 +694,6 @@ static void hv_online_page(struct page *pg)
                        __online_page_set_limits(pg);
                        __online_page_increment_counters(pg);
                        __online_page_free(pg);
-                       has->covered_start_pfn++;
                }
        }
 }
@@ -692,10 +737,9 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
                 * is, update it.
                 */
 
-               if (has->covered_end_pfn != start_pfn) {
+               if (has->covered_end_pfn != start_pfn)
                        has->covered_end_pfn = start_pfn;
-                       has->covered_start_pfn = start_pfn;
-               }
+
                return true;
 
        }
@@ -738,9 +782,18 @@ static unsigned long handle_pg_range(unsigned long pg_start,
                        pgs_ol = has->ha_end_pfn - start_pfn;
                        if (pgs_ol > pfn_cnt)
                                pgs_ol = pfn_cnt;
-                       hv_bring_pgs_online(start_pfn, pgs_ol);
+
+                       /*
+                        * Check if the corresponding memory block is already
+                        * online by checking its last previously backed page.
+                        * In case it is we need to bring rest (which was not
+                        * backed previously) online too.
+                        */
+                       if (start_pfn > has->start_pfn &&
+                           !PageReserved(pfn_to_page(start_pfn - 1)))
+                               hv_bring_pgs_online(start_pfn, pgs_ol);
+
                        has->covered_end_pfn +=  pgs_ol;
-                       has->covered_start_pfn +=  pgs_ol;
                        pfn_cnt -= pgs_ol;
                }
 
@@ -801,7 +854,6 @@ static unsigned long process_hot_add(unsigned long pg_start,
                list_add_tail(&ha_region->list, &dm_device.ha_region_list);
                ha_region->start_pfn = rg_start;
                ha_region->ha_end_pfn = rg_start;
-               ha_region->covered_start_pfn = pg_start;
                ha_region->covered_end_pfn = pg_start;
                ha_region->end_pfn = rg_start + rg_size;
        }
@@ -828,9 +880,9 @@ static void hot_add_req(struct work_struct *dummy)
        memset(&resp, 0, sizeof(struct dm_hot_add_response));
        resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE;
        resp.hdr.size = sizeof(struct dm_hot_add_response);
-       resp.hdr.trans_id = atomic_inc_return(&trans_id);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+       mutex_lock(&dm_device.ha_region_mutex);
        pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
        pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
 
@@ -862,6 +914,9 @@ static void hot_add_req(struct work_struct *dummy)
        if (do_hot_add)
                resp.page_count = process_hot_add(pg_start, pfn_cnt,
                                                rg_start, rg_sz);
+
+       dm->num_pages_added += resp.page_count;
+       mutex_unlock(&dm_device.ha_region_mutex);
 #endif
        /*
         * The result field of the response structure has the
@@ -890,6 +945,7 @@ static void hot_add_req(struct work_struct *dummy)
                pr_info("Memory hot add failed\n");
 
        dm->state = DM_INITIALIZED;
+       resp.hdr.trans_id = atomic_inc_return(&trans_id);
        vmbus_sendpacket(dm->dev->channel, &resp,
                        sizeof(struct dm_hot_add_response),
                        (unsigned long)NULL,
@@ -924,9 +980,8 @@ static unsigned long compute_balloon_floor(void)
         *     128        72    (1/2)
         *     512       168    (1/4)
         *    2048       360    (1/8)
-        *    8192       552    (1/32)
-        *   32768      1320
-        *  131072      4392
+        *    8192       744    (1/16)
+        *   32768      1512    (1/32)
         */
        if (totalram_pages < MB2PAGES(128))
                min_pages = MB2PAGES(8) + (totalram_pages >> 1);
@@ -934,8 +989,10 @@ static unsigned long compute_balloon_floor(void)
                min_pages = MB2PAGES(40) + (totalram_pages >> 2);
        else if (totalram_pages < MB2PAGES(2048))
                min_pages = MB2PAGES(104) + (totalram_pages >> 3);
+       else if (totalram_pages < MB2PAGES(8192))
+               min_pages = MB2PAGES(232) + (totalram_pages >> 4);
        else
-               min_pages = MB2PAGES(296) + (totalram_pages >> 5);
+               min_pages = MB2PAGES(488) + (totalram_pages >> 5);
 #undef MB2PAGES
        return min_pages;
 }
@@ -954,11 +1011,17 @@ static void post_status(struct hv_dynmem_device *dm)
 {
        struct dm_status status;
        struct sysinfo val;
+       unsigned long now = jiffies;
+       unsigned long last_post = last_post_time;
 
        if (pressure_report_delay > 0) {
                --pressure_report_delay;
                return;
        }
+
+       if (!time_after(now, (last_post_time + HZ)))
+               return;
+
        si_meminfo(&val);
        memset(&status, 0, sizeof(struct dm_status));
        status.hdr.type = DM_STATUS_REPORT;
@@ -966,18 +1029,38 @@ static void post_status(struct hv_dynmem_device *dm)
        status.hdr.trans_id = atomic_inc_return(&trans_id);
 
        /*
-        * The host expects the guest to report free memory.
-        * Further, the host expects the pressure information to
-        * include the ballooned out pages.
-        * For a given amount of memory that we are managing, we
-        * need to compute a floor below which we should not balloon.
-        * Compute this and add it to the pressure report.
+        * The host expects the guest to report free and committed memory.
+        * Furthermore, the host expects the pressure information to include
+        * the ballooned out pages. For a given amount of memory that we are
+        * managing we need to compute a floor below which we should not
+        * balloon. Compute this and add it to the pressure report.
+        * We also need to report all offline pages (num_pages_added -
+        * num_pages_onlined) as committed to the host, otherwise it can try
+        * asking us to balloon them out.
         */
        status.num_avail = val.freeram;
        status.num_committed = vm_memory_committed() +
-                               dm->num_pages_ballooned +
-                               compute_balloon_floor();
+               dm->num_pages_ballooned +
+               (dm->num_pages_added > dm->num_pages_onlined ?
+                dm->num_pages_added - dm->num_pages_onlined : 0) +
+               compute_balloon_floor();
+
+       /*
+        * If our transaction ID is no longer current, just don't
+        * send the status. This can happen if we were interrupted
+        * after we picked our transaction ID.
+        */
+       if (status.hdr.trans_id != atomic_read(&trans_id))
+               return;
+
+       /*
+        * If the last post time that we sampled has changed,
+        * we have raced, don't post the status.
+        */
+       if (last_post != last_post_time)
+               return;
 
+       last_post_time = jiffies;
        vmbus_sendpacket(dm->dev->channel, &status,
                                sizeof(struct dm_status),
                                (unsigned long)NULL,
@@ -1002,11 +1085,12 @@ static void free_balloon_pages(struct hv_dynmem_device *dm,
 
 
 
-static int  alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
-                        struct dm_balloon_response *bl_resp, int alloc_unit,
-                        bool *alloc_error)
+static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
+                                       unsigned int num_pages,
+                                       struct dm_balloon_response *bl_resp,
+                                       int alloc_unit)
 {
-       int i = 0;
+       unsigned int i = 0;
        struct page *pg;
 
        if (num_pages < alloc_unit)
@@ -1025,11 +1109,8 @@ static int  alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
                                __GFP_NOMEMALLOC | __GFP_NOWARN,
                                get_order(alloc_unit << PAGE_SHIFT));
 
-               if (!pg) {
-                       *alloc_error = true;
+               if (!pg)
                        return i * alloc_unit;
-               }
-
 
                dm->num_pages_ballooned += alloc_unit;
 
@@ -1056,15 +1137,18 @@ static int  alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
 
 static void balloon_up(struct work_struct *dummy)
 {
-       int num_pages = dm_device.balloon_wrk.num_pages;
-       int num_ballooned = 0;
+       unsigned int num_pages = dm_device.balloon_wrk.num_pages;
+       unsigned int num_ballooned = 0;
        struct dm_balloon_response *bl_resp;
        int alloc_unit;
        int ret;
-       bool alloc_error = false;
        bool done = false;
        int i;
+       struct sysinfo val;
+       unsigned long floor;
 
+       /* The host balloons pages in 2M granularity. */
+       WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0);
 
        /*
         * We will attempt 2M allocations. However, if we fail to
@@ -1072,26 +1156,33 @@ static void balloon_up(struct work_struct *dummy)
         */
        alloc_unit = 512;
 
+       si_meminfo(&val);
+       floor = compute_balloon_floor();
+
+       /* Refuse to balloon below the floor, keep the 2M granularity. */
+       if (val.freeram < num_pages || val.freeram - num_pages < floor) {
+               num_pages = val.freeram > floor ? (val.freeram - floor) : 0;
+               num_pages -= num_pages % PAGES_IN_2M;
+       }
+
        while (!done) {
                bl_resp = (struct dm_balloon_response *)send_buffer;
                memset(send_buffer, 0, PAGE_SIZE);
                bl_resp->hdr.type = DM_BALLOON_RESPONSE;
-               bl_resp->hdr.trans_id = atomic_inc_return(&trans_id);
                bl_resp->hdr.size = sizeof(struct dm_balloon_response);
                bl_resp->more_pages = 1;
 
 
                num_pages -= num_ballooned;
                num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
-                                               bl_resp, alloc_unit,
-                                                &alloc_error);
+                                                   bl_resp, alloc_unit);
 
-               if ((alloc_error) && (alloc_unit != 1)) {
+               if (alloc_unit != 1 && num_ballooned == 0) {
                        alloc_unit = 1;
                        continue;
                }
 
-               if ((alloc_error) || (num_ballooned == num_pages)) {
+               if (num_ballooned == 0 || num_ballooned == num_pages) {
                        bl_resp->more_pages = 0;
                        done = true;
                        dm_device.state = DM_INITIALIZED;
@@ -1104,6 +1195,7 @@ static void balloon_up(struct work_struct *dummy)
                 */
 
                do {
+                       bl_resp->hdr.trans_id = atomic_inc_return(&trans_id);
                        ret = vmbus_sendpacket(dm_device.dev->channel,
                                                bl_resp,
                                                bl_resp->hdr.size,
@@ -1112,7 +1204,7 @@ static void balloon_up(struct work_struct *dummy)
 
                        if (ret == -EAGAIN)
                                msleep(20);
-
+                       post_status(&dm_device);
                } while (ret == -EAGAIN);
 
                if (ret) {
@@ -1139,8 +1231,10 @@ static void balloon_down(struct hv_dynmem_device *dm,
        struct dm_unballoon_response resp;
        int i;
 
-       for (i = 0; i < range_count; i++)
+       for (i = 0; i < range_count; i++) {
                free_balloon_pages(dm, &range_array[i]);
+               complete(&dm_device.config_event);
+       }
 
        if (req->more_pages == 1)
                return;
@@ -1163,18 +1257,16 @@ static void balloon_onchannelcallback(void *context);
 static int dm_thread_func(void *dm_dev)
 {
        struct hv_dynmem_device *dm = dm_dev;
-       int t;
 
        while (!kthread_should_stop()) {
-               t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ);
+               wait_for_completion_interruptible_timeout(
+                                               &dm_device.config_event, 1*HZ);
                /*
                 * The host expects us to post information on the memory
                 * pressure every second.
                 */
-
-               if (t == 0)
-                       post_status(dm);
-
+               reinit_completion(&dm_device.config_event);
+               post_status(dm);
        }
 
        return 0;
@@ -1206,13 +1298,25 @@ static void version_resp(struct hv_dynmem_device *dm,
        if (dm->next_version == 0)
                goto version_error;
 
-       dm->next_version = 0;
        memset(&version_req, 0, sizeof(struct dm_version_request));
        version_req.hdr.type = DM_VERSION_REQUEST;
        version_req.hdr.size = sizeof(struct dm_version_request);
        version_req.hdr.trans_id = atomic_inc_return(&trans_id);
-       version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN7;
-       version_req.is_last_attempt = 1;
+       version_req.version.version = dm->next_version;
+
+       /*
+        * Set the next version to try in case current version fails.
+        * Win7 protocol ought to be the last one to try.
+        */
+       switch (version_req.version.version) {
+       case DYNMEM_PROTOCOL_VERSION_WIN8:
+               dm->next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
+               version_req.is_last_attempt = 0;
+               break;
+       default:
+               dm->next_version = 0;
+               version_req.is_last_attempt = 1;
+       }
 
        ret = vmbus_sendpacket(dm->dev->channel, &version_req,
                                sizeof(struct dm_version_request),
@@ -1329,7 +1433,8 @@ static void balloon_onchannelcallback(void *context)
 static int balloon_probe(struct hv_device *dev,
                        const struct hv_vmbus_device_id *dev_id)
 {
-       int ret, t;
+       int ret;
+       unsigned long t;
        struct dm_version_request version_req;
        struct dm_capabilities cap_msg;
 
@@ -1351,10 +1456,11 @@ static int balloon_probe(struct hv_device *dev,
 
        dm_device.dev = dev;
        dm_device.state = DM_INITIALIZING;
-       dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
+       dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
        init_completion(&dm_device.host_event);
        init_completion(&dm_device.config_event);
        INIT_LIST_HEAD(&dm_device.ha_region_list);
+       mutex_init(&dm_device.ha_region_mutex);
        INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
        INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
        dm_device.host_specified_ha_region = false;
@@ -1368,6 +1474,7 @@ static int balloon_probe(struct hv_device *dev,
 
 #ifdef CONFIG_MEMORY_HOTPLUG
        set_online_page_callback(&hv_online_page);
+       register_memory_notifier(&hv_memory_nb);
 #endif
 
        hv_set_drvdata(dev, &dm_device);
@@ -1381,7 +1488,7 @@ static int balloon_probe(struct hv_device *dev,
        version_req.hdr.type = DM_VERSION_REQUEST;
        version_req.hdr.size = sizeof(struct dm_version_request);
        version_req.hdr.trans_id = atomic_inc_return(&trans_id);
-       version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN8;
+       version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN10;
        version_req.is_last_attempt = 0;
 
        ret = vmbus_sendpacket(dev->channel, &version_req,
@@ -1486,6 +1593,7 @@ static int balloon_remove(struct hv_device *dev)
        kfree(send_buffer);
 #ifdef CONFIG_MEMORY_HOTPLUG
        restore_online_page_callback(&hv_online_page);
+       unregister_memory_notifier(&hv_memory_nb);
 #endif
        list_for_each_safe(cur, tmp, &dm->ha_region_list) {
                has = list_entry(cur, struct hv_hotadd_state, list);
@@ -1521,5 +1629,4 @@ static int __init init_balloon_drv(void)
 module_init(init_balloon_drv);
 
 MODULE_DESCRIPTION("Hyper-V Balloon");
-MODULE_VERSION(HV_DRV_VERSION);
 MODULE_LICENSE("GPL");