Merge branch 'bugfix' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 10 Dec 2009 17:35:02 +0000 (09:35 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 10 Dec 2009 17:35:02 +0000 (09:35 -0800)
* 'bugfix' of git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen:
  xen: try harder to balloon up under memory pressure.
  Xen balloon: fix totalram_pages counting.
  xen: explicitly create/destroy stop_machine workqueues outside suspend/resume region.
  xen: improve error handling in do_suspend.
  xen: don't leak IRQs over suspend/resume.
  xen: call clock resume notifier on all CPUs
  xen: use iret for return from 64b kernel to 32b usermode
  xen: don't call dpm_resume_noirq() with interrupts disabled.
  xen: register runstate info for boot CPU early
  xen: register runstate on secondary CPUs
  xen: register timer interrupt with IRQF_TIMER
  xen: correctly restore pfn_to_mfn_list_list after resume
  xen: restore runstate_info even if !have_vcpu_info_placement
  xen: re-register runstate area earlier on resume.
  xen: wait up to 5 minutes for device connetion
  xen: improvement to wait_for_devices()
  xen: fix is_disconnected_device/exists_disconnected_device
  xen/xenbus: make DEVICE_ATTR()s static

arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
arch/x86/xen/smp.c
arch/x86/xen/suspend.c
arch/x86/xen/time.c
arch/x86/xen/xen-asm_64.S
arch/x86/xen/xen-ops.h
drivers/xen/balloon.c
drivers/xen/events.c
drivers/xen/manage.c
drivers/xen/xenbus/xenbus_probe.c

index c462cea8ef095f10d5a3c70877c43edc45954406..b8e45f164e2acc360bcc5c380a864a635761f134 100644 (file)
@@ -138,24 +138,23 @@ static void xen_vcpu_setup(int cpu)
  */
 void xen_vcpu_restore(void)
 {
-       if (have_vcpu_info_placement) {
-               int cpu;
+       int cpu;
 
-               for_each_online_cpu(cpu) {
-                       bool other_cpu = (cpu != smp_processor_id());
+       for_each_online_cpu(cpu) {
+               bool other_cpu = (cpu != smp_processor_id());
 
-                       if (other_cpu &&
-                           HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
-                               BUG();
+               if (other_cpu &&
+                   HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+                       BUG();
 
-                       xen_vcpu_setup(cpu);
+               xen_setup_runstate_info(cpu);
 
-                       if (other_cpu &&
-                           HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
-                               BUG();
-               }
+               if (have_vcpu_info_placement)
+                       xen_vcpu_setup(cpu);
 
-               BUG_ON(!have_vcpu_info_placement);
+               if (other_cpu &&
+                   HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+                       BUG();
        }
 }
 
@@ -1180,6 +1179,8 @@ asmlinkage void __init xen_start_kernel(void)
 
        xen_raw_console_write("about to get started...\n");
 
+       xen_setup_runstate_info(0);
+
        /* Start the world */
 #ifdef CONFIG_X86_32
        i386_start_kernel();
index 3bf7b1d250ce986d02bc44de0bd2086f6e37b824..bf4cd6bfe959f1037431668c71a263050098f35a 100644 (file)
@@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn)
 }
 
 /* Build the parallel p2m_top_mfn structures */
-static void __init xen_build_mfn_list_list(void)
+void xen_build_mfn_list_list(void)
 {
        unsigned pfn, idx;
 
index 738da0cb0d8b0622d1fba7decf51f54692a32da7..64757c0ba5fc2255a4d9188b3a68636832a73a1f 100644 (file)
@@ -295,6 +295,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
                (unsigned long)task_stack_page(idle) -
                KERNEL_STACK_OFFSET + THREAD_SIZE;
 #endif
+       xen_setup_runstate_info(cpu);
        xen_setup_timer(cpu);
        xen_init_lock_cpu(cpu);
 
index 95be7b434724c5067214dc416367633c522a69bb..987267f79bf5154648cb729cedf29e712fa58ab3 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/types.h>
+#include <linux/clockchips.h>
 
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
@@ -27,6 +28,8 @@ void xen_pre_suspend(void)
 
 void xen_post_suspend(int suspend_cancelled)
 {
+       xen_build_mfn_list_list();
+
        xen_setup_shared_info();
 
        if (suspend_cancelled) {
@@ -44,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled)
 
 }
 
+static void xen_vcpu_notify_restore(void *data)
+{
+       unsigned long reason = (unsigned long)data;
+
+       /* Boot processor notified via generic timekeeping_resume() */
+       if ( smp_processor_id() == 0)
+               return;
+
+       clockevents_notify(reason, NULL);
+}
+
 void xen_arch_resume(void)
 {
-       /* nothing */
+       smp_call_function(xen_vcpu_notify_restore,
+                              (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
 }
index 0a5aa44299a51fac214833fe00c68d947ee92367..9d1f853120d859cfc814e0f2eaa2e01b3f1575e8 100644 (file)
@@ -100,7 +100,7 @@ bool xen_vcpu_stolen(int vcpu)
        return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
 }
 
-static void setup_runstate_info(int cpu)
+void xen_setup_runstate_info(int cpu)
 {
        struct vcpu_register_runstate_memory_area area;
 
@@ -434,7 +434,7 @@ void xen_setup_timer(int cpu)
                name = "<timer kasprintf failed>";
 
        irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
-                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
                                      name, NULL);
 
        evt = &per_cpu(xen_clock_events, cpu);
@@ -442,8 +442,6 @@ void xen_setup_timer(int cpu)
 
        evt->cpumask = cpumask_of(cpu);
        evt->irq = irq;
-
-       setup_runstate_info(cpu);
 }
 
 void xen_teardown_timer(int cpu)
@@ -494,6 +492,7 @@ __init void xen_time_init(void)
 
        setup_force_cpu_cap(X86_FEATURE_TSC);
 
+       xen_setup_runstate_info(cpu);
        xen_setup_timer(cpu);
        xen_setup_cpu_clockevents();
 }
index 02f496a8dbaa250d40f843456b8ac10fc183fe14..53adefda4275330a810b6d883b6ad8b58a72730c 100644 (file)
@@ -96,7 +96,7 @@ ENTRY(xen_sysret32)
        pushq $__USER32_CS
        pushq %rcx
 
-       pushq $VGCF_in_syscall
+       pushq $0
 1:     jmp hypercall_iret
 ENDPATCH(xen_sysret32)
 RELOC(xen_sysret32, 1b+1)
@@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target)
 ENTRY(xen_sysenter_target)
        lea 16(%rsp), %rsp      /* strip %rcx, %r11 */
        mov $-ENOSYS, %rax
-       pushq $VGCF_in_syscall
+       pushq $0
        jmp hypercall_iret
 ENDPROC(xen_syscall32_target)
 ENDPROC(xen_sysenter_target)
index 355fa6b99c9c402e80d8443d2b94647436062c7a..f9153a300bcee998e1f78f95a202cc9036df79ca 100644 (file)
@@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
 
 void xen_setup_mfn_list_list(void);
 void xen_setup_shared_info(void);
+void xen_build_mfn_list_list(void);
 void xen_setup_machphys_mapping(void);
 pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_ident_map_ISA(void);
@@ -41,6 +42,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
 
 void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);
+void xen_setup_runstate_info(int cpu);
 void xen_teardown_timer(int cpu);
 cycle_t xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
index d31505b6f7a465fd965257210da53466cc83e34e..4204336135849de16ffd5a9d9d010d5ed5231433 100644 (file)
@@ -66,8 +66,6 @@ struct balloon_stats {
        /* We aim for 'current allocation' == 'target allocation'. */
        unsigned long current_pages;
        unsigned long target_pages;
-       /* We may hit the hard limit in Xen. If we do then we remember it. */
-       unsigned long hard_limit;
        /*
         * Drivers may alter the memory reservation independently, but they
         * must inform the balloon driver so we avoid hitting the hard limit.
@@ -136,6 +134,8 @@ static void balloon_append(struct page *page)
                list_add(&page->lru, &ballooned_pages);
                balloon_stats.balloon_low++;
        }
+
+       totalram_pages--;
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@@ -156,6 +156,8 @@ static struct page *balloon_retrieve(void)
        else
                balloon_stats.balloon_low--;
 
+       totalram_pages++;
+
        return page;
 }
 
@@ -181,7 +183,7 @@ static void balloon_alarm(unsigned long unused)
 
 static unsigned long current_target(void)
 {
-       unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
+       unsigned long target = balloon_stats.target_pages;
 
        target = min(target,
                     balloon_stats.current_pages +
@@ -217,23 +219,10 @@ static int increase_reservation(unsigned long nr_pages)
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents = nr_pages;
        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
-       if (rc < nr_pages) {
-               if (rc > 0) {
-                       int ret;
-
-                       /* We hit the Xen hard limit: reprobe. */
-                       reservation.nr_extents = rc;
-                       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-                                                  &reservation);
-                       BUG_ON(ret != rc);
-               }
-               if (rc >= 0)
-                       balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
-                                                   balloon_stats.driver_pages);
+       if (rc < 0)
                goto out;
-       }
 
-       for (i = 0; i < nr_pages; i++) {
+       for (i = 0; i < rc; i++) {
                page = balloon_retrieve();
                BUG_ON(page == NULL);
 
@@ -259,13 +248,12 @@ static int increase_reservation(unsigned long nr_pages)
                __free_page(page);
        }
 
-       balloon_stats.current_pages += nr_pages;
-       totalram_pages = balloon_stats.current_pages;
+       balloon_stats.current_pages += rc;
 
  out:
        spin_unlock_irqrestore(&balloon_lock, flags);
 
-       return 0;
+       return rc < 0 ? rc : rc != nr_pages;
 }
 
 static int decrease_reservation(unsigned long nr_pages)
@@ -323,7 +311,6 @@ static int decrease_reservation(unsigned long nr_pages)
        BUG_ON(ret != nr_pages);
 
        balloon_stats.current_pages -= nr_pages;
-       totalram_pages = balloon_stats.current_pages;
 
        spin_unlock_irqrestore(&balloon_lock, flags);
 
@@ -367,7 +354,6 @@ static void balloon_process(struct work_struct *work)
 static void balloon_set_new_target(unsigned long target)
 {
        /* No need for lock. Not read-modify-write updates. */
-       balloon_stats.hard_limit   = ~0UL;
        balloon_stats.target_pages = target;
        schedule_work(&balloon_worker);
 }
@@ -422,12 +408,10 @@ static int __init balloon_init(void)
        pr_info("xen_balloon: Initialising balloon driver.\n");
 
        balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
-       totalram_pages   = balloon_stats.current_pages;
        balloon_stats.target_pages  = balloon_stats.current_pages;
        balloon_stats.balloon_low   = 0;
        balloon_stats.balloon_high  = 0;
        balloon_stats.driver_pages  = 0UL;
-       balloon_stats.hard_limit    = ~0UL;
 
        init_timer(&balloon_timer);
        balloon_timer.data = 0;
@@ -472,9 +456,6 @@ module_exit(balloon_exit);
 BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
 BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
 BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
-BALLOON_SHOW(hard_limit_kb,
-            (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
-            (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
 BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
 
 static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
@@ -544,7 +525,6 @@ static struct attribute *balloon_info_attrs[] = {
        &attr_current_kb.attr,
        &attr_low_kb.attr,
        &attr_high_kb.attr,
-       &attr_hard_limit_kb.attr,
        &attr_driver_kb.attr,
        NULL
 };
index 2f57276e87a2f65cd8c468529484e3ca21f88c8b..ce602dd09bc18c60381c68f0f1310bb168817bed 100644 (file)
@@ -474,6 +474,9 @@ static void unbind_from_irq(unsigned int irq)
                bind_evtchn_to_cpu(evtchn, 0);
 
                evtchn_to_irq[evtchn] = -1;
+       }
+
+       if (irq_info[irq].type != IRQT_UNBOUND) {
                irq_info[irq] = mk_unbound_info();
 
                dynamic_irq_cleanup(irq);
index 10d03d7931c47236a42e2e7ac158ac7564308354..c4997930afc71a741a429df77b7e98f7b5d58872 100644 (file)
@@ -43,7 +43,6 @@ static int xen_suspend(void *data)
        if (err) {
                printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
                        err);
-               dpm_resume_noirq(PMSG_RESUME);
                return err;
        }
 
@@ -69,7 +68,6 @@ static int xen_suspend(void *data)
        }
 
        sysdev_resume();
-       dpm_resume_noirq(PMSG_RESUME);
 
        return 0;
 }
@@ -81,6 +79,12 @@ static void do_suspend(void)
 
        shutting_down = SHUTDOWN_SUSPEND;
 
+       err = stop_machine_create();
+       if (err) {
+               printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
+               goto out;
+       }
+
 #ifdef CONFIG_PREEMPT
        /* If the kernel is preemptible, we need to freeze all the processes
           to prevent them from being in the middle of a pagetable update
@@ -88,29 +92,32 @@ static void do_suspend(void)
        err = freeze_processes();
        if (err) {
                printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
-               return;
+               goto out_destroy_sm;
        }
 #endif
 
        err = dpm_suspend_start(PMSG_SUSPEND);
        if (err) {
                printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err);
-               goto out;
+               goto out_thaw;
        }
 
-       printk(KERN_DEBUG "suspending xenstore...\n");
-       xs_suspend();
-
        err = dpm_suspend_noirq(PMSG_SUSPEND);
        if (err) {
                printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
-               goto resume_devices;
+               goto out_resume;
        }
 
+       printk(KERN_DEBUG "suspending xenstore...\n");
+       xs_suspend();
+
        err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
+
+       dpm_resume_noirq(PMSG_RESUME);
+
        if (err) {
                printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
-               goto out;
+               cancelled = 1;
        }
 
        if (!cancelled) {
@@ -119,17 +126,21 @@ static void do_suspend(void)
        } else
                xs_suspend_cancel();
 
-       dpm_resume_noirq(PMSG_RESUME);
-
-resume_devices:
+out_resume:
        dpm_resume_end(PMSG_RESUME);
 
        /* Make sure timer events get retriggered on all CPUs */
        clock_was_set();
-out:
+
+out_thaw:
 #ifdef CONFIG_PREEMPT
        thaw_processes();
+
+out_destroy_sm:
 #endif
+       stop_machine_destroy();
+
+out:
        shutting_down = SHUTDOWN_INVALID;
 }
 #endif /* CONFIG_PM_SLEEP */
index d42e25d5968dcf48acd5448a7a6af56d21abd5fb..649fcdf114b7aab926cfbe4c3c348441d0c58ee0 100644 (file)
@@ -454,21 +454,21 @@ static ssize_t xendev_show_nodename(struct device *dev,
 {
        return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
 }
-DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
+static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
 
 static ssize_t xendev_show_devtype(struct device *dev,
                                   struct device_attribute *attr, char *buf)
 {
        return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
 }
-DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
+static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
 
 static ssize_t xendev_show_modalias(struct device *dev,
                                    struct device_attribute *attr, char *buf)
 {
        return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
 }
-DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
+static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
 
 int xenbus_probe_node(struct xen_bus_type *bus,
                      const char *type,
@@ -843,7 +843,7 @@ postcore_initcall(xenbus_probe_init);
 
 MODULE_LICENSE("GPL");
 
-static int is_disconnected_device(struct device *dev, void *data)
+static int is_device_connecting(struct device *dev, void *data)
 {
        struct xenbus_device *xendev = to_xenbus_device(dev);
        struct device_driver *drv = data;
@@ -861,14 +861,15 @@ static int is_disconnected_device(struct device *dev, void *data)
                return 0;
 
        xendrv = to_xenbus_driver(dev->driver);
-       return (xendev->state != XenbusStateConnected ||
-               (xendrv->is_ready && !xendrv->is_ready(xendev)));
+       return (xendev->state < XenbusStateConnected ||
+               (xendev->state == XenbusStateConnected &&
+                xendrv->is_ready && !xendrv->is_ready(xendev)));
 }
 
-static int exists_disconnected_device(struct device_driver *drv)
+static int exists_connecting_device(struct device_driver *drv)
 {
        return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
-                               is_disconnected_device);
+                               is_device_connecting);
 }
 
 static int print_device_status(struct device *dev, void *data)
@@ -884,10 +885,13 @@ static int print_device_status(struct device *dev, void *data)
                /* Information only: is this too noisy? */
                printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
                       xendev->nodename);
-       } else if (xendev->state != XenbusStateConnected) {
+       } else if (xendev->state < XenbusStateConnected) {
+               enum xenbus_state rstate = XenbusStateUnknown;
+               if (xendev->otherend)
+                       rstate = xenbus_read_driver_state(xendev->otherend);
                printk(KERN_WARNING "XENBUS: Timeout connecting "
-                      "to device: %s (state %d)\n",
-                      xendev->nodename, xendev->state);
+                      "to device: %s (local state %d, remote state %d)\n",
+                      xendev->nodename, xendev->state, rstate);
        }
 
        return 0;
@@ -897,7 +901,7 @@ static int print_device_status(struct device *dev, void *data)
 static int ready_to_wait_for_devices;
 
 /*
- * On a 10 second timeout, wait for all devices currently configured.  We need
+ * On a 5-minute timeout, wait for all devices currently configured.  We need
  * to do this to guarantee that the filesystems and / or network devices
  * needed for boot are available, before we can allow the boot to proceed.
  *
@@ -912,18 +916,30 @@ static int ready_to_wait_for_devices;
  */
 static void wait_for_devices(struct xenbus_driver *xendrv)
 {
-       unsigned long timeout = jiffies + 10*HZ;
+       unsigned long start = jiffies;
        struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+       unsigned int seconds_waited = 0;
 
        if (!ready_to_wait_for_devices || !xen_domain())
                return;
 
-       while (exists_disconnected_device(drv)) {
-               if (time_after(jiffies, timeout))
-                       break;
+       while (exists_connecting_device(drv)) {
+               if (time_after(jiffies, start + (seconds_waited+5)*HZ)) {
+                       if (!seconds_waited)
+                               printk(KERN_WARNING "XENBUS: Waiting for "
+                                      "devices to initialise: ");
+                       seconds_waited += 5;
+                       printk("%us...", 300 - seconds_waited);
+                       if (seconds_waited == 300)
+                               break;
+               }
+
                schedule_timeout_interruptible(HZ/10);
        }
 
+       if (seconds_waited)
+               printk("\n");
+
        bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
                         print_device_status);
 }