Merge branch 'lsk-v4.4-eas-v5.2' of git://git.linaro.org/arm/eas/kernel.git

[firefly-linux-kernel-4.4.55.git] / kernel / sched / sched.h
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index efd3bfc7e34722883e2f08ca82f91cffde812963..a537f1864dd08bad9b94563eeac44912da276f09 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -506,10 +506,18 @@ struct dl_rq {
  #else
         struct dl_bw dl_bw;
  #endif
+       /* This is the "average utilization" for this runqueue */
+       s64 avg_bw;
  };
  
  #ifdef CONFIG_SMP
  
+struct max_cpu_capacity {
+       raw_spinlock_t lock;
+       unsigned long val;
+       int cpu;
+};
+
  /*
   * We add the notion of a root-domain which will be used to define per-domain
   * variables. Each exclusive cpuset essentially defines an island domain by
@@ -528,6 +536,9 @@ struct root_domain {
         /* Indicate more than one runnable task for any CPU */
         bool overload;
  
+       /* Indicate one or more cpus over-utilized (tipping point) */
+       bool overutilized;
+
         /*
          * The bit corresponding to a CPU gets set here if such CPU has more
          * than one runnable -deadline task (as it is below for RT tasks).
@@ -543,6 +554,9 @@ struct root_domain {
          */
         cpumask_var_t rto_mask;
         struct cpupri cpupri;
+
+       /* Maximum cpu capacity in the system. */
+       struct max_cpu_capacity max_cpu_capacity;
  };
  
  extern struct root_domain def_root_domain;
@@ -572,6 +586,7 @@ struct rq {
         #define CPU_LOAD_IDX_MAX 5
         unsigned long cpu_load[CPU_LOAD_IDX_MAX];
         unsigned long last_load_update_tick;
+       unsigned int misfit_task;
  #ifdef CONFIG_NO_HZ_COMMON
         u64 nohz_stamp;
         unsigned long nohz_flags;
@@ -687,6 +702,7 @@ struct rq {
  #ifdef CONFIG_CPU_IDLE
         /* Must be inspected within a rcu lock section */
         struct cpuidle_state *idle_state;
+       int idle_state_idx;
  #endif
  };
  
@@ -836,6 +852,8 @@ DECLARE_PER_CPU(int, sd_llc_id);
  DECLARE_PER_CPU(struct sched_domain *, sd_numa);
  DECLARE_PER_CPU(struct sched_domain *, sd_busy);
  DECLARE_PER_CPU(struct sched_domain *, sd_asym);
+DECLARE_PER_CPU(struct sched_domain *, sd_ea);
+DECLARE_PER_CPU(struct sched_domain *, sd_scs);
  
  struct sched_group_capacity {
         atomic_t ref;
@@ -843,7 +861,8 @@ struct sched_group_capacity {
          * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
          * for a single CPU.
          */
-       unsigned int capacity;
+       unsigned long capacity;
+       unsigned long max_capacity; /* Max per-cpu capacity in group */
         unsigned long next_update;
         int imbalance; /* XXX unrelated to capacity but shared group state */
         /*
@@ -860,6 +879,7 @@ struct sched_group {
  
         unsigned int group_weight;
         struct sched_group_capacity *sgc;
+       const struct sched_group_energy const *sge;
  
         /*
          * The CPUs this group covers.
@@ -1073,6 +1093,9 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
          * We must ensure this doesn't happen until the switch is completely
          * finished.
          *
+        * In particular, the load of prev->state in finish_task_switch() must
+        * happen before this.
+        *
          * Pairs with the control dependency and rmb in try_to_wake_up().
          */
         smp_store_release(&prev->on_cpu, 0);
@@ -1160,6 +1183,7 @@ static const u32 prio_to_wmult[40] = {
  #endif
  #define ENQUEUE_REPLENISH      0x08
  #define ENQUEUE_RESTORE        0x10
+#define ENQUEUE_WAKEUP_NEW     0x20
  
  #define DEQUEUE_SLEEP          0x01
  #define DEQUEUE_SAVE           0x02
@@ -1273,6 +1297,17 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
         WARN_ON(!rcu_read_lock_held());
         return rq->idle_state;
  }
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+       rq->idle_state_idx = idle_state_idx;
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+       WARN_ON(!rcu_read_lock_held());
+       return rq->idle_state_idx;
+}
  #else
  static inline void idle_set_state(struct rq *rq,
                                   struct cpuidle_state *idle_state)
@@ -1283,6 +1318,15 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
  {
         return NULL;
  }
+
+static inline void idle_set_state_idx(struct rq *rq, int idle_state_idx)
+{
+}
+
+static inline int idle_get_state_idx(struct rq *rq)
+{
+       return -1;
+}
  #endif
  
  extern void sysrq_sched_debug_show(void);
@@ -1307,6 +1351,8 @@ unsigned long to_ratio(u64 period, u64 runtime);
  
  extern void init_entity_runnable_average(struct sched_entity *se);
  
+extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
+
  static inline void add_nr_running(struct rq *rq, unsigned count)
  {
         unsigned prev_nr = rq->nr_running;
@@ -1412,10 +1458,117 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
  }
  #endif
  
+#ifdef CONFIG_SMP
+static inline unsigned long capacity_of(int cpu)
+{
+       return cpu_rq(cpu)->cpu_capacity;
+}
+
+static inline unsigned long capacity_orig_of(int cpu)
+{
+       return cpu_rq(cpu)->cpu_capacity_orig;
+}
+
+/*
+ * cpu_util returns the amount of capacity of a CPU that is used by CFS
+ * tasks. The unit of the return value must be the one of capacity so we can
+ * compare the utilization with the capacity of the CPU that is available for
+ * CFS task (ie cpu_capacity).
+ *
+ * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
+ * recent utilization of currently non-runnable tasks on a CPU. It represents
+ * the amount of utilization of a CPU in the range [0..capacity_orig] where
+ * capacity_orig is the cpu_capacity available at the highest frequency
+ * (arch_scale_freq_capacity()).
+ * The utilization of a CPU converges towards a sum equal to or less than the
+ * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
+ * the running time on this CPU scaled by capacity_curr.
+ *
+ * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
+ * higher than capacity_orig because of unfortunate rounding in
+ * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
+ * the average stabilizes with the new running time. We need to check that the
+ * utilization stays within the range of [0..capacity_orig] and cap it if
+ * necessary. Without utilization capping, a group could be seen as overloaded
+ * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
+ * available capacity. We allow utilization to overshoot capacity_curr (but not
+ * capacity_orig) as it useful for predicting the capacity required after task
+ * migrations (scheduler-driven DVFS).
+ */
+static inline unsigned long __cpu_util(int cpu, int delta)
+{
+       unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
+       unsigned long capacity = capacity_orig_of(cpu);
+
+       delta += util;
+       if (delta < 0)
+               return 0;
+
+       return (delta >= capacity) ? capacity : delta;
+}
+
+static inline unsigned long cpu_util(int cpu)
+{
+       return __cpu_util(cpu, 0);
+}
+
+#endif
+
+#ifdef CONFIG_CPU_FREQ_GOV_SCHED
+#define capacity_max SCHED_CAPACITY_SCALE
+extern unsigned int capacity_margin;
+extern struct static_key __sched_freq;
+
+static inline bool sched_freq(void)
+{
+       return static_key_false(&__sched_freq);
+}
+
+DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
+void update_cpu_capacity_request(int cpu, bool request);
+
+static inline void set_cfs_cpu_capacity(int cpu, bool request,
+                                       unsigned long capacity)
+{
+       if (per_cpu(cpu_sched_capacity_reqs, cpu).cfs != capacity) {
+               per_cpu(cpu_sched_capacity_reqs, cpu).cfs = capacity;
+               update_cpu_capacity_request(cpu, request);
+       }
+}
+
+static inline void set_rt_cpu_capacity(int cpu, bool request,
+                                      unsigned long capacity)
+{
+       if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) {
+               per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity;
+               update_cpu_capacity_request(cpu, request);
+       }
+}
+
+static inline void set_dl_cpu_capacity(int cpu, bool request,
+                                      unsigned long capacity)
+{
+       if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) {
+               per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity;
+               update_cpu_capacity_request(cpu, request);
+       }
+}
+#else
+static inline bool sched_freq(void) { return false; }
+static inline void set_cfs_cpu_capacity(int cpu, bool request,
+                                       unsigned long capacity)
+{ }
+static inline void set_rt_cpu_capacity(int cpu, bool request,
+                                      unsigned long capacity)
+{ }
+static inline void set_dl_cpu_capacity(int cpu, bool request,
+                                      unsigned long capacity)
+{ }
+#endif
+
  static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
  {
         rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
-       sched_avg_update(rq);
  }
  #else
  static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
@@ -1767,3 +1920,16 @@ static inline u64 irq_time_read(int cpu)
  }
  #endif /* CONFIG_64BIT */
  #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
+static inline void account_reset_rq(struct rq *rq)
+{
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+       rq->prev_irq_time = 0;
+#endif
+#ifdef CONFIG_PARAVIRT
+       rq->prev_steal_time = 0;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+       rq->prev_steal_time_rq = 0;
+#endif
+}