@@ -275,6 +275,21 @@ struct load_weight {
u32 inv_weight;
};
+/**
+ * Estimation Utilization for FAIR tasks.
+ *
+ * Support data structure to track an Exponential Weighted Moving Average
+ * (EWMA) of a FAIR task's utilization. New samples are added to the moving
+ * average each time a task completes an activation. Sample's weight is
+ * chosen so that the EWMA will be relatively insensitive to transient changes
+ * to the task's workload.
+ */
+struct util_est {
+ unsigned int last;
+ unsigned int ewma;
+#define UTIL_EST_WEIGHT_SHIFT 2
+};
+
/*
* The load_avg/util_avg accumulates an infinite geometric series
* (see __update_load_avg() in kernel/sched/fair.c).
@@ -336,6 +351,7 @@ struct sched_avg {
unsigned long load_avg;
unsigned long runnable_load_avg;
unsigned long util_avg;
+ struct util_est util_est;
};
struct sched_statistics {
@@ -567,6 +567,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
cfs_rq->avg.runnable_load_avg);
SEQ_printf(m, " .%-30s: %lu\n", "util_avg",
cfs_rq->avg.util_avg);
+ SEQ_printf(m, " .%-30s: %lu\n", "util_est_runnable",
+ cfs_rq->util_est_runnable);
SEQ_printf(m, " .%-30s: %ld\n", "removed.load_avg",
cfs_rq->removed.load_avg);
SEQ_printf(m, " .%-30s: %ld\n", "removed.util_avg",
@@ -1018,6 +1020,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(se.avg.runnable_load_avg);
P(se.avg.util_avg);
P(se.avg.last_update_time);
+ P(se.avg.util_est.ewma);
+ P(se.avg.util_est.last);
#endif
P(policy);
P(prio);
@@ -5193,6 +5193,20 @@ static inline void hrtick_update(struct rq *rq)
}
#endif
+static inline unsigned long task_util(struct task_struct *p);
+static inline unsigned long task_util_est(struct task_struct *p);
+
+static inline void util_est_enqueue(struct task_struct *p)
+{
+ struct cfs_rq *cfs_rq = &task_rq(p)->cfs;
+
+ if (!sched_feat(UTIL_EST))
+ return;
+
+ /* Update root cfs_rq's estimated utilization */
+ cfs_rq->util_est_runnable += task_util_est(p);
+}
+
/*
* The enqueue_task method is called before nr_running is
* increased. Here we update the fair scheduling stats and
@@ -5245,9 +5259,70 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!se)
add_nr_running(rq, 1);
+ util_est_enqueue(p);
hrtick_update(rq);
}
+static inline void util_est_dequeue(struct task_struct *p, int flags)
+{
+ struct cfs_rq *cfs_rq = &task_rq(p)->cfs;
+ unsigned long util_last = task_util(p);
+ bool sleep = flags & DEQUEUE_SLEEP;
+ unsigned long ewma;
+ long util_est = 0;
+
+ if (!sched_feat(UTIL_EST))
+ return;
+
+ /*
+ * Update root cfs_rq's estimated utilization
+ *
+ * If *p is the last task then the root cfs_rq's estimated utilization
+ * of a CPU is 0 by definition.
+ */
+ if (cfs_rq->nr_running) {
+ util_est = READ_ONCE(cfs_rq->util_est_runnable);
+ util_est -= min_t(long, util_est, task_util_est(p));
+ }
+ WRITE_ONCE(cfs_rq->util_est_runnable, util_est);
+
+ /*
+ * Skip update of task's estimated utilization when the task has not
+ * yet completed an activation, e.g. being migrated.
+ */
+ if (!sleep)
+ return;
+
+ /*
+ * Skip update of task's estimated utilization when its EWMA is already
+ * ~1% close to its last activation value.
+ */
+ util_est = p->util_est.ewma;
+ if (abs(util_est - util_last) <= (SCHED_CAPACITY_SCALE / 100))
+ return;
+
+ /*
+ * Update Task's estimated utilization
+ *
+ * When *p completes an activation we can consolidate another sample
+ * about the task size. This is done by storing the last PELT value
+ * for this task and using this value to load another sample in the
+ * exponential weighted moving average:
+ *
+ * ewma(t) = w * task_util(p) + (1 - w) ewma(t-1)
+ * = w * task_util(p) + ewma(t-1) - w * ewma(t-1)
+ * = w * (task_util(p) + ewma(t-1) / w - ewma(t-1))
+ *
+ * Where 'w' is the weight of new samples, which is configured to be
+ * 0.25, thus making w=1/4
+ */
+ p->se.avg.util_est.last = util_last;
+ ewma = READ_ONCE(p->se.avg.util_est.ewma);
+ ewma = util_last + (ewma << UTIL_EST_WEIGHT_SHIFT) - ewma;
+ ewma >>= UTIL_EST_WEIGHT_SHIFT;
+ WRITE_ONCE(p->se.avg.util_est.ewma, ewma);
+}
+
static void set_next_buddy(struct sched_entity *se);
/*
@@ -5304,6 +5379,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!se)
sub_nr_running(rq, 1);
+ util_est_dequeue(p, flags);
hrtick_update(rq);
}
@@ -5767,7 +5843,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
return affine;
}
-static inline unsigned long task_util(struct task_struct *p);
static unsigned long cpu_util_wake(int cpu, struct task_struct *p);
static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
@@ -6262,6 +6337,11 @@ static inline unsigned long task_util(struct task_struct *p)
return p->se.avg.util_avg;
}
+static inline unsigned long task_util_est(struct task_struct *p)
+{
+ return max(p->se.avg.util_est.ewma, p->se.avg.util_est.last);
+}
+
/*
* cpu_util_wake: Compute cpu utilization with any contributions from
* the waking task p removed.
@@ -85,3 +85,8 @@ SCHED_FEAT(ATTACH_AGE_LOAD, true)
SCHED_FEAT(WA_IDLE, true)
SCHED_FEAT(WA_WEIGHT, true)
SCHED_FEAT(WA_BIAS, true)
+
+/*
+ * UtilEstimation. Use estimated CPU utilization.
+ */
+SCHED_FEAT(UTIL_EST, false)
@@ -470,6 +470,7 @@ struct cfs_rq {
* CFS load tracking
*/
struct sched_avg avg;
+ unsigned long util_est_runnable;
#ifndef CONFIG_64BIT
u64 load_last_update_time_copy;
#endif