load average含义 – 源码巴士

命令uptime或者top中，都有load average的显示，那么load average到底什么含义

load average真正的出处是 /proc/loadavg

比如可以查看下这个值

$ cat /proc/loadavg 
1.17 0.79 0.61 2/1413 9334

下面分析这个值的获取原理

loadavg_proc_show是显示值的函数，文件位置: fs/proc/loadavg.c

static int loadavg_proc_show(struct seq_file *m, void *v)
{
	unsigned long avnrun[3];

	get_avenrun(avnrun, FIXED_1/200, 0);

	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %u/%d %d\n",
		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
		nr_running(), nr_threads,
		idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
	return 0;
}

static int __init proc_loadavg_init(void)
{
	struct proc_dir_entry *pde;

	pde = proc_create_single("loadavg", 0, NULL, loadavg_proc_show);
	pde_make_permanent(pde);
	return 0;
}
fs_initcall(proc_loadavg_init);

通过proc_create_single创建 proc 文件节点 loadavg，并指定show 函数指针为loadavg_proc_show()。

get_avenrun 用于取值avnrun，FIXED_1/200的作用是小数点后第三位 4舍5入，函数如下

/**
 * get_avenrun - get the load average array
 * @loads:	pointer to dest load array
 * @offset:	offset to add
 * @shift:	shift count to shift the result left
 *
 * These values are estimates at best, so no need for locking.
 */
void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
{
	loads[0] = (avenrun[0] + offset) << shift;
	loads[1] = (avenrun[1] + offset) << shift;
	loads[2] = (avenrun[2] + offset) << shift;
}

LOAD_INT作用是取整数部分，LOAD_FRAC作用是取小数部分

#define FSHIFT		11		/* nr of bits of precision */
#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */

define LOAD_INT(x) ((x) >> FSHIFT)
#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)

avnrun 保存的值，高11位是整数部分，低11位是小数部分。

整数部分直接右移11位就获取到了。

小数部分先((x) & (FIXED_1-1))，这样将高11位与掉，只保留了低11位，然后乘以100，这样是为了执行LOAD_INT(x)后，保留的是小数点后两位

nr_running 是获取有多少个运行线程，函数如下

/*
 * nr_running and nr_context_switches:
 *
 * externally visible scheduler statistics: current number of runnable
 * threads, total number of context switches performed since bootup.
 */
unsigned int nr_running(void)
{
	unsigned int i, sum = 0;

	for_each_online_cpu(i)
		sum += cpu_rq(i)->nr_running;

	return sum;
}

nr_threads 是获取总的线程数量

idr_get_cursor(&task_active_pid_ns(current)->idr) - 1)

idr_get_cursor(&task_active_pid_ns(current)->idr) - 1) 是为了获取到最新分配的线程pid值

真正的数据在avnrun，我们下面分析

calc_global_load 函数是更新avenrun值的地方，文件位置kernel/sched/loadavg.c

/*
 * calc_load - update the avenrun load estimates 10 ticks after the
 * CPUs have updated calc_load_tasks.
 *
 * Called from the global timer code.
 */
void calc_global_load(void)
{
	unsigned long sample_window;
	long active, delta;

	sample_window = READ_ONCE(calc_load_update);
	if (time_before(jiffies, sample_window + 10))
		return;

	/*
	 * Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
	 */
	delta = calc_load_nohz_read();
	if (delta)
		atomic_long_add(delta, &calc_load_tasks);

	active = atomic_long_read(&calc_load_tasks);
	active = active > 0 ? active * FIXED_1 : 0;

	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
	avenrun[2] = calc_load(avenrun[2], EXP_15, active);

	WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);

	/*
	 * In case we went to NO_HZ for multiple LOAD_FREQ intervals
	 * catch up in bulk.
	 */
	calc_global_nohz();
}

calc_load_tasks的值表示负载更新的周期LOAD_FREQ，具体位置kernel/sched/core.c

void __init sched_init(void)
{
...
calc_load_update = jiffies + LOAD_FREQ;
...
}

#define LOAD_FREQ	(5*HZ+1)	/* 5 sec intervals */

if (time_before(jiffies, sample_window + 10)) 说明当调用calc_global_load速度小于5秒，不更新新值

active = atomic_long_read(&calc_load_tasks);

这个函数获取到当前running状态的进程数 + uninterrupt状态的进程数

calc_load是计算负载的函数，函数位置include/linux/sched/loadavg.h

#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
#define EXP_5		2014		/* 1/exp(5sec/5min) */
#define EXP_15		2037		/* 1/exp(5sec/15min) */

/*
 * a1 = a0 * e + a * (1 - e)
 */
static inline unsigned long
calc_load(unsigned long load, unsigned long exp, unsigned long active)
{
	unsigned long newload;

	newload = load * exp + active * (FIXED_1 - exp);
	if (active >= load)
		newload += FIXED_1-1;

	return newload / FIXED_1;
}

参数说明：

load: 上一个周期负载值

exp: EXP_N的值

active 当前running状态的进程数 + uninterrupt状态的进程数

这些值什么含义？

这里用到了指数移动平均线 (EMA)的概念

EMA(Exponential moving average) 本来用的最多的是股市中计算MACD的值

其基本计算公式为：
EMA(n) = α × Cn+ (1-α) × EMA(n-1)

这个反应的是股市中收盘价的走势

计算机中也有不少地方用到了EMA的概念，比如我们这里的负载计算，不过我们的公式稍微发生了一些变化，原理类似

公式如下

avenrun(t) = avenrun(t -1)  * exp_n + nr_active (1 - exp_n)

avenrun(t) 是我们要计算的t时刻的负载

avenrun(t -1) 是上一时刻的负载

nr_active t 时刻active的线程数量

exp_n 是可调节参数

我们回到calc_load这个函数中的三个参数

load：上一个周期负载值，这里就是avenrun(t -1)

exp： EXP_N的值，这里就是exp_n，我们这里1分钟，5分钟，15分钟时，三个值不一样

active：当前running状态的进程数 + uninterrupt状态的进程数，这里是nr_active

avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n)
nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible;

使用EMV的优点最明显的如下两个

不需要保存前面所有时刻的实际数值，在计算的过程中是逐步覆盖的，因此可以减少内存的占用
在有些场景下，其实更符合实际情况的，例如股票价格，天气等，最新的值对下一个值影响最大，远一点的次之，越远的值影响越小

最终转换后，公式如下

load(t)= load(t-1) * EXP_N / FIXED_1 + nr_active * (1 - EXP_N/FIXED_1)

其中用到的几个值

#define FSHIFT		11		/* nr of bits of precision */
#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
#define LOAD_FREQ	(5*HZ+1)	/* 5 sec intervals */
#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
#define EXP_5		2014		/* 1/exp(5sec/5min) */
#define EXP_15		2037		/* 1/exp(5sec/15min) */

EXP_N就是EXP_1，EXP_5，EXP_15三个值

到此，我们就介绍完了loadavg这个值如何获取

原文链接：https://blog.csdn.net/lizhijun_buaa/article/details/127600239