* How the whole thing works (courtesy of Christoffer Dall):
*
* - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- * something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
- * bitmap (this bitmap is updated by both user land ioctls and guest
- * mmio ops, and other in-kernel peripherals such as the
- * arch. timers) and indicate the 'wire' state.
+ * something is pending on the CPU interface.
+ * - Interrupts that are pending on the distributor are stored on the
+ * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
+ * ioctls and guest mmio ops, and other in-kernel peripherals such as the
+ * arch. timers).
* - Every time the bitmap changes, the irq_pending_on_cpu oracle is
* recalculated
* - To calculate the oracle, we need info for each cpu from
* compute_pending_for_cpu, which considers:
- * - PPI: dist->irq_state & dist->irq_enable
- * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
- * - irq_spi_target is a 'formatted' version of the GICD_ICFGR
+ * - PPI: dist->irq_pending & dist->irq_enable
+ * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
+ * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
* registers, stored on each vcpu. We only keep one bit of
* information per interrupt, making sure that only one vcpu can
* accept the interrupt.
+ * - If any of the above state changes, we must recalculate the oracle.
* - The same is true when injecting an interrupt, except that we only
* consider a single interrupt at a time. The irq_spi_cpu array
* contains the target CPU for each SPI.
* the 'line' again. This is achieved as such:
*
* - When a level interrupt is moved onto a vcpu, the corresponding
- * bit in irq_active is set. As long as this bit is set, the line
+ * bit in irq_queued is set. As long as this bit is set, the line
* will be ignored for further interrupts. The interrupt is injected
* into the vcpu with the GICH_LR_EOI bit set (generate a
* maintenance interrupt on EOI).
* - When the interrupt is EOIed, the maintenance interrupt fires,
- * and clears the corresponding bit in irq_active. This allow the
+ * and clears the corresponding bit in irq_queued. This allows the
* interrupt line to be sampled again.
+ * - Note that level-triggered interrupts can also be set to pending from
+ * writes to GICD_ISPENDRn and lowering the external input line does not
+ * cause the interrupt to become inactive in such a situation.
+ * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
+ * inactive as long as the external input line is held high.
*/
#define VGIC_ADDR_UNDEF (-1)
return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
}
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+ return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
}
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+ vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
}
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+ vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
}
static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
+ return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
}
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
+ vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
}
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
+ vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
}
static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
vcpu->arch.vgic_cpu.pending_shared);
}
+static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
+{
+ return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+}
+
static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
{
return le32_to_cpu(*((u32 *)mmio->data)) & mask;
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
- vcpu->vcpu_id, offset);
+ u32 *reg, orig;
+ u32 level_mask;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
+ level_mask = (~(*reg));
+
+ /* Mark both level and edge triggered irqs as pending */
+ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+ orig = *reg;
vgic_reg_access(mmio, reg, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
if (mmio->is_write) {
+ /* Set the soft-pending flag only for level-triggered irqs */
+ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+ *reg &= level_mask;
+
+ /* Ignore writes to SGIs */
+ if (offset < 2) {
+ *reg &= ~0xffff;
+ *reg |= orig & 0xffff;
+ }
+
vgic_update_state(vcpu->kvm);
return true;
}
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
- vcpu->vcpu_id, offset);
+ u32 *level_active;
+ u32 *reg, orig;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+ orig = *reg;
vgic_reg_access(mmio, reg, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
if (mmio->is_write) {
+ /* Re-set level triggered level-active interrupts */
+ level_active = vgic_bitmap_get_reg(&dist->irq_level,
+ vcpu->vcpu_id, offset);
+ reg = vgic_bitmap_get_reg(&dist->irq_pending,
+ vcpu->vcpu_id, offset);
+ *reg |= *level_active;
+
+ /* Ignore writes to SGIs */
+ if (offset < 2) {
+ *reg &= ~0xffff;
+ *reg |= orig & 0xffff;
+ }
+
+ /* Clear soft-pending flags */
+ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
vgic_update_state(vcpu->kvm);
return true;
}
* is fine, then we are only setting a few bits that were
* already set.
*/
- vgic_dist_irq_set(vcpu, lr.irq);
+ vgic_dist_irq_set_pending(vcpu, lr.irq);
if (lr.irq < VGIC_NR_SGIS)
dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
lr.state &= ~LR_STATE_PENDING;
* active), then the LR does not hold any useful info and can
* be marked as free for other use.
*/
- if (!(lr.state & LR_STATE_MASK))
+ if (!(lr.state & LR_STATE_MASK)) {
vgic_retire_lr(i, lr.irq, vcpu);
+ vgic_irq_clear_queued(vcpu, lr.irq);
+ }
/* Finally update the VGIC state. */
vgic_update_state(vcpu->kvm);
kvm_for_each_vcpu(c, vcpu, kvm) {
if (target_cpus & 1) {
/* Flag the SGI as pending */
- vgic_dist_irq_set(vcpu, sgi);
+ vgic_dist_irq_set_pending(vcpu, sgi);
dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
}
pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
pend_shared = vcpu->arch.vgic_cpu.pending_shared;
- pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
+ pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
- pending = vgic_bitmap_get_shared_map(&dist->irq_state);
+ pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
bitmap_and(pend_shared, pend_shared,
if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
vgic_retire_lr(lr, vlr.irq, vcpu);
- if (vgic_irq_is_active(vcpu, vlr.irq))
- vgic_irq_clear_active(vcpu, vlr.irq);
+ if (vgic_irq_is_queued(vcpu, vlr.irq))
+ vgic_irq_clear_queued(vcpu, vlr.irq);
}
}
}
* our emulated gic and can get rid of them.
*/
if (!sources) {
- vgic_dist_irq_clear(vcpu, irq);
+ vgic_dist_irq_clear_pending(vcpu, irq);
vgic_cpu_irq_clear(vcpu, irq);
return true;
}
static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
{
- if (vgic_irq_is_active(vcpu, irq))
+ if (!vgic_can_sample_irq(vcpu, irq))
return true; /* level interrupt, already queued */
if (vgic_queue_irq(vcpu, 0, irq)) {
if (vgic_irq_is_edge(vcpu, irq)) {
- vgic_dist_irq_clear(vcpu, irq);
+ vgic_dist_irq_clear_pending(vcpu, irq);
vgic_cpu_irq_clear(vcpu, irq);
} else {
- vgic_irq_set_active(vcpu, irq);
+ vgic_irq_set_queued(vcpu, irq);
}
return true;
for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+ WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
- vgic_irq_clear_active(vcpu, vlr.irq);
+ vgic_irq_clear_queued(vcpu, vlr.irq);
WARN_ON(vlr.state & LR_STATE_MASK);
vlr.state = 0;
vgic_set_lr(vcpu, lr, vlr);
+ /*
+ * If the IRQ was EOIed it was also ACKed and we we
+ * therefore assume we can clear the soft pending
+ * state (should it had been set) for this interrupt.
+ *
+ * Note: if the IRQ soft pending state was set after
+ * the IRQ was acked, it actually shouldn't be
+ * cleared, but we have no way of knowing that unless
+ * we start trapping ACKs when the soft-pending state
+ * is set.
+ */
+ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
/* Any additional pending interrupt? */
- if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) {
+ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
vgic_cpu_irq_set(vcpu, vlr.irq);
level_pending = true;
} else {
+ vgic_dist_irq_clear_pending(vcpu, vlr.irq);
vgic_cpu_irq_clear(vcpu, vlr.irq);
}
static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
{
- int is_edge = vgic_irq_is_edge(vcpu, irq);
- int state = vgic_dist_irq_is_pending(vcpu, irq);
+ int edge_triggered = vgic_irq_is_edge(vcpu, irq);
/*
* Only inject an interrupt if:
* - edge triggered and we have a rising edge
* - level triggered and we change level
*/
- if (is_edge)
+ if (edge_triggered) {
+ int state = vgic_dist_irq_is_pending(vcpu, irq);
return level > state;
- else
+ } else {
+ int state = vgic_dist_irq_get_level(vcpu, irq);
return level != state;
+ }
}
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
+static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
unsigned int irq_num, bool level)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int is_edge, is_level;
+ int edge_triggered, level_triggered;
int enabled;
bool ret = true;
spin_lock(&dist->lock);
vcpu = kvm_get_vcpu(kvm, cpuid);
- is_edge = vgic_irq_is_edge(vcpu, irq_num);
- is_level = !is_edge;
+ edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
+ level_triggered = !edge_triggered;
if (!vgic_validate_injection(vcpu, irq_num, level)) {
ret = false;
kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
- if (level)
- vgic_dist_irq_set(vcpu, irq_num);
- else
- vgic_dist_irq_clear(vcpu, irq_num);
+ if (level) {
+ if (level_triggered)
+ vgic_dist_irq_set_level(vcpu, irq_num);
+ vgic_dist_irq_set_pending(vcpu, irq_num);
+ } else {
+ if (level_triggered) {
+ vgic_dist_irq_clear_level(vcpu, irq_num);
+ if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+ vgic_dist_irq_clear_pending(vcpu, irq_num);
+ } else {
+ vgic_dist_irq_clear_pending(vcpu, irq_num);
+ }
+ }
enabled = vgic_irq_is_enabled(vcpu, irq_num);
goto out;
}
- if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
+ if (!vgic_can_sample_irq(vcpu, irq_num)) {
/*
* Level interrupt in progress, will be picked up
* when EOId.
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level)
{
- if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+ if (likely(vgic_initialized(kvm)) &&
+ vgic_update_irq_pending(kvm, cpuid, irq_num, level))
vgic_kick_vcpus(kvm);
return 0;
return 0;
}
-static void vgic_init_maintenance_interrupt(void *info)
-{
- enable_percpu_irq(vgic->maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
- unsigned long action, void *cpu)
-{
- switch (action) {
- case CPU_STARTING:
- case CPU_STARTING_FROZEN:
- vgic_init_maintenance_interrupt(NULL);
- break;
- case CPU_DYING:
- case CPU_DYING_FROZEN:
- disable_percpu_irq(vgic->maint_irq);
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
- .notifier_call = vgic_cpu_notify,
-};
-
-static const struct of_device_id vgic_ids[] = {
- { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
- { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
- {},
-};
-
-int kvm_vgic_hyp_init(void)
-{
- const struct of_device_id *matched_id;
- int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
- const struct vgic_params **);
- struct device_node *vgic_node;
- int ret;
-
- vgic_node = of_find_matching_node_and_match(NULL,
- vgic_ids, &matched_id);
- if (!vgic_node) {
- kvm_err("error: no compatible GIC node found\n");
- return -ENODEV;
- }
-
- vgic_probe = matched_id->data;
- ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
- if (ret)
- return ret;
-
- ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
- "vgic", kvm_get_running_vcpus());
- if (ret) {
- kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
- return ret;
- }
-
- ret = __register_cpu_notifier(&vgic_cpu_nb);
- if (ret) {
- kvm_err("Cannot register vgic CPU notifier\n");
- goto out_free_irq;
- }
-
- /* Callback into for arch code for setup */
- vgic_arch_setup(vgic);
-
- on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
- return 0;
-
-out_free_irq:
- free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
- return ret;
-}
-
/**
* kvm_vgic_init - Initialize global VGIC state before running any VCPUs
* @kvm: pointer to the kvm struct
return ret;
}
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
+static int vgic_ioaddr_overlap(struct kvm *kvm)
{
phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
return kvm_vgic_create(dev->kvm);
}
-struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
.name = "kvm-arm-vgic",
.create = vgic_create,
.destroy = vgic_destroy,
.get_attr = vgic_get_attr,
.has_attr = vgic_has_attr,
};
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+ enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *cpu)
+{
+ switch (action) {
+ case CPU_STARTING:
+ case CPU_STARTING_FROZEN:
+ vgic_init_maintenance_interrupt(NULL);
+ break;
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
+ disable_percpu_irq(vgic->maint_irq);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+ .notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+ { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+ { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+ {},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+ const struct of_device_id *matched_id;
+ const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+ const struct vgic_params **);
+ struct device_node *vgic_node;
+ int ret;
+
+ vgic_node = of_find_matching_node_and_match(NULL,
+ vgic_ids, &matched_id);
+ if (!vgic_node) {
+ kvm_err("error: no compatible GIC node found\n");
+ return -ENODEV;
+ }
+
+ vgic_probe = matched_id->data;
+ ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+ if (ret)
+ return ret;
+
+ ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+ "vgic", kvm_get_running_vcpus());
+ if (ret) {
+ kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+ return ret;
+ }
+
+ ret = __register_cpu_notifier(&vgic_cpu_nb);
+ if (ret) {
+ kvm_err("Cannot register vgic CPU notifier\n");
+ goto out_free_irq;
+ }
+
+ /* Callback into for arch code for setup */
+ vgic_arch_setup(vgic);
+
+ on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+ return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+ KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+ free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+ return ret;
+}