kvm: ioapic: conditionally delay irq delivery duringeoi broadcast
authorZhang Haoyu <zhanghy@sangfor.com>
Thu, 11 Sep 2014 08:47:04 +0000 (16:47 +0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 16 Sep 2014 12:44:48 +0000 (14:44 +0200)
Currently, we call ioapic_service() immediately when we find the irq is still
active during eoi broadcast. But for real hardware, there's some delay between
the EOI writing and irq delivery.  If we do not emulate this behavior, and
re-inject the interrupt immediately after the guest sends an EOI and re-enables
interrupts, a guest might spend all its time in the ISR if it has a broken
handler for a level-triggered interrupt.

Such livelock actually happens with Windows guests when resuming from
hibernation.

As there's no way to recognize the broken handle from new raised ones, this patch
delays an interrupt if 10.000 consecutive EOIs found that the interrupt was
still high.  The guest can then make a little forward progress, until a proper
IRQ handler is set or until some detection routine in the guest (such as
Linux's note_interrupt()) recognizes the situation.

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Zhang Haoyu <zhanghy@sangfor.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
include/trace/events/kvm.h
virt/kvm/ioapic.c
virt/kvm/ioapic.h

index 908925ace77661a2e056fa5ddd9ac52df12a0b19..ab679c395042247ac13b0837051325aab96c55af 100644 (file)
@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq,
                  __entry->coalesced ? " (coalesced)" : "")
 );
 
+TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
+           TP_PROTO(__u64 e),
+           TP_ARGS(e),
+
+       TP_STRUCT__entry(
+               __field(        __u64,          e               )
+       ),
+
+       TP_fast_assign(
+               __entry->e              = e;
+       ),
+
+       TP_printk("dst %x vec=%u (%s|%s|%s%s)",
+                 (u8)(__entry->e >> 56), (u8)__entry->e,
+                 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
+                 (__entry->e & (1<<11)) ? "logical" : "physical",
+                 (__entry->e & (1<<15)) ? "level" : "edge",
+                 (__entry->e & (1<<16)) ? "|masked" : "")
+);
+
 TRACE_EVENT(kvm_msi_set_irq,
            TP_PROTO(__u64 address, __u64 data),
            TP_ARGS(address, data),
index e8ce34c9db32cd0d2aa15016ea5c847cae6af2b6..0ba4057d271befe7f166f52f66cbd2cdaf9f4ea5 100644 (file)
@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
        spin_unlock(&ioapic->lock);
 }
 
+static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
+{
+       int i;
+       struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
+                                                eoi_inject.work);
+       spin_lock(&ioapic->lock);
+       for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+               union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+               if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
+                       continue;
+
+               if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
+                       ioapic_service(ioapic, i, false);
+       }
+       spin_unlock(&ioapic->lock);
+}
+
+#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
+
 static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
                        struct kvm_ioapic *ioapic, int vector, int trigger_mode)
 {
@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 
                ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
                ent->fields.remote_irr = 0;
-               if (ioapic->irr & (1 << i))
-                       ioapic_service(ioapic, i, false);
+               if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
+                       ++ioapic->irq_eoi[i];
+                       if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
+                               /*
+                                * Real hardware does not deliver the interrupt
+                                * immediately during eoi broadcast, and this
+                                * lets a buggy guest make slow progress
+                                * even if it does not correctly handle a
+                                * level-triggered interrupt.  Emulate this
+                                * behavior if we detect an interrupt storm.
+                                */
+                               schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
+                               ioapic->irq_eoi[i] = 0;
+                               trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
+                       } else {
+                               ioapic_service(ioapic, i, false);
+                       }
+               } else {
+                       ioapic->irq_eoi[i] = 0;
+               }
        }
 }
 
@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 {
        int i;
 
+       cancel_delayed_work_sync(&ioapic->eoi_inject);
        for (i = 0; i < IOAPIC_NUM_PINS; i++)
                ioapic->redirtbl[i].fields.mask = 1;
        ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
        ioapic->ioregsel = 0;
        ioapic->irr = 0;
        ioapic->id = 0;
+       memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
        rtc_irq_eoi_tracking_reset(ioapic);
        update_handled_vectors(ioapic);
 }
@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm)
        if (!ioapic)
                return -ENOMEM;
        spin_lock_init(&ioapic->lock);
+       INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
        kvm->arch.vioapic = ioapic;
        kvm_ioapic_reset(ioapic);
        kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm)
 {
        struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 
+       cancel_delayed_work_sync(&ioapic->eoi_inject);
        if (ioapic) {
                kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
                kvm->arch.vioapic = NULL;
index 90d43e95dcf85151f8543b8faeb80419f844ba0b..e23b70634f1ea88488525a6e9c7a61af0638de01 100644 (file)
@@ -59,6 +59,8 @@ struct kvm_ioapic {
        spinlock_t lock;
        DECLARE_BITMAP(handled_vectors, 256);
        struct rtc_status rtc_status;
+       struct delayed_work eoi_inject;
+       u32 irq_eoi[IOAPIC_NUM_PINS];
 };
 
 #ifdef DEBUG