select GENERIC_CMOS_UPDATE
select GENERIC_TIME_VSYSCALL_OLD
select GENERIC_CLOCKEVENTS
+ select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select HAVE_MOD_ARCH_SPECIFIC
#define OPAL_FLASH_VALIDATE 76
#define OPAL_FLASH_MANAGE 77
#define OPAL_FLASH_UPDATE 78
+#define OPAL_RESYNC_TIMEBASE 79
#define OPAL_DUMP_INIT 81
#define OPAL_DUMP_INFO 82
#define OPAL_DUMP_READ 83
extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
extern void opal_shutdown(void);
+extern int opal_resync_timebase(void);
extern void opal_lpc_init(void);
extern int powersave_nap; /* set if nap mode can be used in idle loop */
extern void power7_nap(void);
+extern void power7_sleep(void);
extern void flush_instruction_cache(void);
extern void hard_reset_now(void);
extern void poweroff_now(void);
* in /proc/interrupts will be wrong!!! --Troy */
#define PPC_MSG_CALL_FUNCTION 0
#define PPC_MSG_RESCHEDULE 1
-#define PPC_MSG_CALL_FUNC_SINGLE 2
+#define PPC_MSG_TICK_BROADCAST 2
#define PPC_MSG_DEBUGGER_BREAK 3
/* for irq controllers that have dedicated ipis per message (4) */
struct rtc_time;
extern void to_tm(int tim, struct rtc_time * tm);
extern void GregorianDay(struct rtc_time *tm);
+extern void tick_broadcast_ipi_handler(void);
extern void generic_calibrate_decr(void);
cmpwi cr1,r13,2
/* Total loss of HV state is fatal, we could try to use the
* PIR to locate a PACA, then use an emergency stack etc...
- * but for now, let's just stay stuck here
+ * OPAL v3 based powernv platforms have new idle states
+ * which fall in this catagory.
*/
- bgt cr1,.
+ bgt cr1,8f
GET_PACA(r13)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
beq cr1,2f
b .power7_wakeup_noloss
2: b .power7_wakeup_loss
+
+ /* Fast Sleep wakeup on PowerNV */
+8: GET_PACA(r13)
+ b .power7_wakeup_tb_loss
+
9:
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif /* CONFIG_PPC_P7_NAP */
#include <asm/ppc-opcode.h>
#include <asm/hw_irq.h>
#include <asm/kvm_book3s_asm.h>
+#include <asm/opal.h>
#undef DEBUG
- .text
+/* Idle state entry routines */
-_GLOBAL(power7_idle)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
- /* fall through */
+#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+1: cmp cr0,r0,r0; \
+ bne 1b; \
+ IDLE_INST; \
+ b .
-_GLOBAL(power7_nap)
+ .text
+
+/*
+ * Pass requested state in r3:
+ * 0 - nap
+ * 1 - sleep
+ */
+_GLOBAL(power7_powersave_common)
+ /* Use r3 to pass state nap/sleep/winkle */
/* NAP is a state loss, we create a regs frame on the
* stack, fill it up with the state we care about and
* stick a pointer to it in PACAR1. We really only
/* Continue saving state */
SAVE_GPR(2, r1)
SAVE_NVGPRS(r1)
- mfcr r3
- std r3,_CCR(r1)
+ mfcr r4
+ std r4,_CCR(r1)
std r9,_MSR(r1)
std r1,PACAR1(r13)
li r4,KVM_HWTHREAD_IN_NAP
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
+ cmpwi cr0,r3,1
+ beq 2f
+ IDLE_STATE_ENTER_SEQ(PPC_NAP)
+ /* No return */
+2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+ /* No return */
- /* Magic NAP mode enter sequence */
- std r0,0(r1)
- ptesync
- ld r0,0(r1)
-1: cmp cr0,r0,r0
- bne 1b
- PPC_NAP
- b .
+_GLOBAL(power7_idle)
+ /* Now check if user or arch enabled NAP mode */
+ LOAD_REG_ADDRBASE(r3,powersave_nap)
+ lwz r4,ADDROFF(powersave_nap)(r3)
+ cmpwi 0,r4,0
+ beqlr
+ /* fall through */
+
+_GLOBAL(power7_nap)
+ li r3,0
+ b power7_powersave_common
+ /* No return */
+
+_GLOBAL(power7_sleep)
+ li r3,1
+ b power7_powersave_common
+ /* No return */
+
+_GLOBAL(power7_wakeup_tb_loss)
+ ld r2,PACATOC(r13);
+ ld r1,PACAR1(r13)
+
+ /* Time base re-sync */
+ li r0,OPAL_RESYNC_TIMEBASE
+ LOAD_REG_ADDR(r11,opal);
+ ld r12,8(r11);
+ ld r2,0(r11);
+ mtctr r12
+ bctrl
+
+ /* TODO: Check r3 for failure */
+
+ REST_NVGPRS(r1)
+ REST_GPR(2, r1)
+ ld r3,_CCR(r1)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtcr r3
+ mfspr r3,SPRN_SRR1 /* Return SRR1 */
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
_GLOBAL(power7_wakeup_loss)
ld r1,PACAR1(r13)
#include <asm/ptrace.h>
#include <linux/atomic.h>
#include <asm/irq.h>
+#include <asm/hw_irq.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
return IRQ_HANDLED;
}
-static irqreturn_t call_function_single_action(int irq, void *data)
+static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
{
- generic_smp_call_function_single_interrupt();
+ tick_broadcast_ipi_handler();
return IRQ_HANDLED;
}
static irq_handler_t smp_ipi_action[] = {
[PPC_MSG_CALL_FUNCTION] = call_function_action,
[PPC_MSG_RESCHEDULE] = reschedule_action,
- [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+ [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
};
const char *smp_ipi_name[] = {
[PPC_MSG_CALL_FUNCTION] = "ipi call function",
[PPC_MSG_RESCHEDULE] = "ipi reschedule",
- [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+ [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
};
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
scheduler_ipi();
- if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE))
- generic_smp_call_function_single_interrupt();
+ if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
+ tick_broadcast_ipi_handler();
if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
debug_ipi_action(0, NULL);
} while (info->messages);
void arch_send_call_function_single_ipi(int cpu)
{
- do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+ do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask)
+ do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
+}
+#endif
+
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
void smp_send_debugger_break(void)
{
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
+#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
.irq = 0,
.set_next_event = decrementer_set_next_event,
.set_mode = decrementer_set_mode,
- .features = CLOCK_EVT_FEAT_ONESHOT,
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP,
};
EXPORT_SYMBOL(decrementer_clockevent);
#endif /* CONFIG_IRQ_WORK */
+void __timer_interrupt(void)
+{
+ struct pt_regs *regs = get_irq_regs();
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+ struct clock_event_device *evt = &__get_cpu_var(decrementers);
+ u64 now;
+
+ trace_timer_interrupt_entry(regs);
+
+ if (test_irq_work_pending()) {
+ clear_irq_work_pending();
+ irq_work_run();
+ }
+
+ now = get_tb_or_rtc();
+ if (now >= *next_tb) {
+ *next_tb = ~(u64)0;
+ if (evt->event_handler)
+ evt->event_handler(evt);
+ __get_cpu_var(irq_stat).timer_irqs_event++;
+ } else {
+ now = *next_tb - now;
+ if (now <= DECREMENTER_MAX)
+ set_dec((int)now);
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
+ __get_cpu_var(irq_stat).timer_irqs_others++;
+ }
+
+#ifdef CONFIG_PPC64
+ /* collect purr register values often, for accurate calculations */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+ cu->current_tb = mfspr(SPRN_PURR);
+ }
+#endif
+
+ trace_timer_interrupt_exit(regs);
+}
+
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
{
struct pt_regs *old_regs;
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
- struct clock_event_device *evt = &__get_cpu_var(decrementers);
- u64 now;
/* Ensure a positive value is written to the decrementer, or else
* some CPUs will continue to take decrementer exceptions.
old_regs = set_irq_regs(regs);
irq_enter();
- trace_timer_interrupt_entry(regs);
-
- if (test_irq_work_pending()) {
- clear_irq_work_pending();
- irq_work_run();
- }
-
- now = get_tb_or_rtc();
- if (now >= *next_tb) {
- *next_tb = ~(u64)0;
- if (evt->event_handler)
- evt->event_handler(evt);
- __get_cpu_var(irq_stat).timer_irqs_event++;
- } else {
- now = *next_tb - now;
- if (now <= DECREMENTER_MAX)
- set_dec((int)now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
- __get_cpu_var(irq_stat).timer_irqs_others++;
- }
-
-#ifdef CONFIG_PPC64
- /* collect purr register values often, for accurate calculations */
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
- cu->current_tb = mfspr(SPRN_PURR);
- }
-#endif
-
- trace_timer_interrupt_exit(regs);
-
+ __timer_interrupt();
irq_exit();
set_irq_regs(old_regs);
}
decrementer_set_next_event(DECREMENTER_MAX, dev);
}
+/* Interrupt handler for the timer broadcast IPI */
+void tick_broadcast_ipi_handler(void)
+{
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+ *next_tb = get_tb_or_rtc();
+ __timer_interrupt();
+}
+
static void register_decrementer_clockevent(int cpu)
{
struct clock_event_device *dec = &per_cpu(decrementers, cpu);
clocksource_init();
init_decrementer_clockevent();
+ tick_setup_hrtimer_broadcast();
}
{
iic_request_ipi(PPC_MSG_CALL_FUNCTION);
iic_request_ipi(PPC_MSG_RESCHEDULE);
- iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE);
+ iic_request_ipi(PPC_MSG_TICK_BROADCAST);
iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
}
OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2);
BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION != 0);
BUILD_BUG_ON(PPC_MSG_RESCHEDULE != 1);
- BUILD_BUG_ON(PPC_MSG_CALL_FUNC_SINGLE != 2);
+ BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST != 2);
BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK != 3);
for (i = 0; i < MSG_COUNT; i++) {
#include <linux/cpuidle.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <linux/clockchips.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/runlatch.h>
+/* Flags and constants used in PowerNV platform */
+
+#define MAX_POWERNV_IDLE_STATES 8
+#define IDLE_USE_INST_NAP 0x00010000 /* Use nap instruction */
+#define IDLE_USE_INST_SLEEP 0x00020000 /* Use sleep instruction */
+
struct cpuidle_driver powernv_idle_driver = {
.name = "powernv_idle",
.owner = THIS_MODULE,
return index;
}
+static int fastsleep_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long old_lpcr = mfspr(SPRN_LPCR);
+ unsigned long new_lpcr;
+
+ if (unlikely(system_state < SYSTEM_RUNNING))
+ return index;
+
+ new_lpcr = old_lpcr;
+ new_lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */
+
+ /* exit powersave upon external interrupt, but not decrementer
+ * interrupt.
+ */
+ new_lpcr |= LPCR_PECE0;
+
+ mtspr(SPRN_LPCR, new_lpcr);
+ power7_sleep();
+
+ mtspr(SPRN_LPCR, old_lpcr);
+
+ return index;
+}
+
/*
* States for dedicated partition case.
*/
-static struct cpuidle_state powernv_states[] = {
+static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = {
{ /* Snooze */
.name = "snooze",
.desc = "snooze",
.exit_latency = 0,
.target_residency = 0,
.enter = &snooze_loop },
- { /* NAP */
- .name = "NAP",
- .desc = "NAP",
- .flags = CPUIDLE_FLAG_TIME_VALID,
- .exit_latency = 10,
- .target_residency = 100,
- .enter = &nap_loop },
};
static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
return 0;
}
+static int powernv_add_idle_states(void)
+{
+ struct device_node *power_mgt;
+ struct property *prop;
+ int nr_idle_states = 1; /* Snooze */
+ int dt_idle_states;
+ u32 *flags;
+ int i;
+
+ /* Currently we have snooze statically defined */
+
+ power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+ if (!power_mgt) {
+ pr_warn("opal: PowerMgmt Node not found\n");
+ return nr_idle_states;
+ }
+
+ prop = of_find_property(power_mgt, "ibm,cpu-idle-state-flags", NULL);
+ if (!prop) {
+ pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+ return nr_idle_states;
+ }
+
+ dt_idle_states = prop->length / sizeof(u32);
+ flags = (u32 *) prop->value;
+
+ for (i = 0; i < dt_idle_states; i++) {
+
+ if (flags[i] & IDLE_USE_INST_NAP) {
+ /* Add NAP state */
+ strcpy(powernv_states[nr_idle_states].name, "Nap");
+ strcpy(powernv_states[nr_idle_states].desc, "Nap");
+ powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID;
+ powernv_states[nr_idle_states].exit_latency = 10;
+ powernv_states[nr_idle_states].target_residency = 100;
+ powernv_states[nr_idle_states].enter = &nap_loop;
+ nr_idle_states++;
+ }
+
+ if (flags[i] & IDLE_USE_INST_SLEEP) {
+ /* Add FASTSLEEP state */
+ strcpy(powernv_states[nr_idle_states].name, "FastSleep");
+ strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
+ powernv_states[nr_idle_states].flags =
+ CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP;
+ powernv_states[nr_idle_states].exit_latency = 300;
+ powernv_states[nr_idle_states].target_residency = 1000000;
+ powernv_states[nr_idle_states].enter = &fastsleep_loop;
+ nr_idle_states++;
+ }
+ }
+
+ return nr_idle_states;
+}
+
/*
* powernv_idle_probe()
* Choose state table for shared versus dedicated partition
*/
static int powernv_idle_probe(void)
{
-
if (cpuidle_disable != IDLE_NO_OVERRIDE)
return -ENODEV;
if (firmware_has_feature(FW_FEATURE_OPALv3)) {
cpuidle_state_table = powernv_states;
- max_idle_state = ARRAY_SIZE(powernv_states);
+ /* Device tree can indicate more idle states */
+ max_idle_state = powernv_add_idle_states();
} else
return -ENODEV;