apps: add live metrics route
This commit is contained in:
+98
-32
@@ -5,6 +5,7 @@ exports = module.exports = {
|
||||
getSystemStream,
|
||||
|
||||
getContainer,
|
||||
getContainerStream,
|
||||
|
||||
sendToGraphite
|
||||
};
|
||||
@@ -23,7 +24,28 @@ const apps = require('./apps.js'),
|
||||
services = require('./services.js'),
|
||||
superagent = require('./superagent.js');
|
||||
|
||||
async function getDockerMetrics() {
|
||||
async function readContainerMetric(name) {
|
||||
assert.strictEqual(typeof name, 'string');
|
||||
|
||||
const stats = await docker.getStats(name);
|
||||
if (Object.keys(stats.memory_stats).length === 0) return null; // the container is stopped. better not to inspect and check State since a race is possible
|
||||
|
||||
const networkRead = stats.networks ? stats.networks.eth0.rx_bytes : 0; // in host mode (turn), networks is missing
|
||||
const networkWrite = stats.networks ? stats.networks.eth0.tx_bytes : 0; // in host mode (turn), networks is missing
|
||||
|
||||
const memoryUsed = stats.memory_stats.usage;
|
||||
const memoryMax = stats.memory_stats.limit;
|
||||
|
||||
const blkioStats = stats.blkio_stats.io_service_bytes_recursive;
|
||||
const blockRead = blkioStats.filter(entry => entry.op === 'read').reduce((sum, entry) => sum + entry.value, 0);
|
||||
const blockWrite = blkioStats.filter(entry => entry.op === 'write').reduce((sum, entry) => sum + entry.value, 0);
|
||||
|
||||
const cpuUsageMsecs = stats.cpu_stats.cpu_usage.total_usage / 1000; // convert to msecs (to match system metrics)
|
||||
|
||||
return { networkRead, networkWrite, blockRead, blockWrite, memoryUsed, memoryMax, cpuUsageMsecs };
|
||||
}
|
||||
|
||||
async function readContainerMetrics() {
|
||||
const allAddons = [ 'turn', 'mail', 'mongodb', 'mysql', 'postgresql' ];
|
||||
|
||||
const containerNames = allAddons;
|
||||
@@ -34,28 +56,14 @@ async function getDockerMetrics() {
|
||||
|
||||
const metrics = {};
|
||||
for (const containerName of containerNames) {
|
||||
const stats = await docker.getStats(containerName);
|
||||
if (Object.keys(stats.memory_stats).length === 0) continue; // the container is stopped. better not to inspect and check State since a race is possible
|
||||
|
||||
const networkRead = stats.networks ? stats.networks.eth0.rx_bytes : 0; // in host mode (turn), networks is missing
|
||||
const networkWrite = stats.networks ? stats.networks.eth0.tx_bytes : 0; // in host mode (turn), networks is missing
|
||||
|
||||
const memUsed = stats.memory_stats.usage;
|
||||
const memMax = stats.memory_stats.limit;
|
||||
|
||||
const blkioStats = stats.blkio_stats.io_service_bytes_recursive;
|
||||
const blockRead = blkioStats.filter(entry => entry.op === 'read').reduce((sum, entry) => sum + entry.value, 0);
|
||||
const blockWrite = blkioStats.filter(entry => entry.op === 'write').reduce((sum, entry) => sum + entry.value, 0);
|
||||
|
||||
const cpuUsage = stats.cpu_stats.cpu_usage.total_usage / 1000; // convert to msecs (to match system metrics)
|
||||
|
||||
metrics[containerName] = { networkRead, networkWrite, blockRead, blockWrite, memUsed, memMax, cpuUsage };
|
||||
const stats = await readContainerMetric(containerName);
|
||||
if (stats) metrics[containerName] = stats;
|
||||
}
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
async function getMemoryMetrics() {
|
||||
async function readMemoryMetrics() {
|
||||
const output = await fs.promises.readFile('/proc/meminfo', { encoding: 'utf8' });
|
||||
|
||||
const totalMemoryMatch = output.match(/^MemTotal:\s+(\d+)/m);
|
||||
@@ -80,7 +88,7 @@ async function getMemoryMetrics() {
|
||||
};
|
||||
}
|
||||
|
||||
async function getCpuMetrics() {
|
||||
async function readCpuMetrics() {
|
||||
const cpus = os.cpus();
|
||||
const userMsecs = cpus.map(c => c.times.user).reduce((p, c) => p+c);
|
||||
const sysMsecs = cpus.map(c => c.times.sys).reduce((p, c) => p+c);
|
||||
@@ -93,24 +101,24 @@ async function sendToGraphite() {
|
||||
|
||||
const graphiteMetrics = [];
|
||||
|
||||
const memoryMetrics = await getMemoryMetrics();
|
||||
const memoryMetrics = await readMemoryMetrics();
|
||||
graphiteMetrics.push({ path: `cloudron.system.memory-used`, value: memoryMetrics.memoryUsed });
|
||||
graphiteMetrics.push({ path: `cloudron.system.swap-used`, value: memoryMetrics.swapUsed });
|
||||
|
||||
const cpuMetrics = await getCpuMetrics();
|
||||
const cpuMetrics = await readCpuMetrics();
|
||||
graphiteMetrics.push({ path: `cloudron.system.cpu-user`, value: cpuMetrics.userMsecs });
|
||||
graphiteMetrics.push({ path: `cloudron.system.cpu-sys`, value: cpuMetrics.sysMsecs });
|
||||
|
||||
const dockerMetrics = await getDockerMetrics();
|
||||
const dockerMetrics = await readContainerMetrics();
|
||||
for (const [name, value] of Object.entries(dockerMetrics)) {
|
||||
graphiteMetrics.push(
|
||||
{ path: `cloudron.container-${name}.network-read`, value: value.networkRead },
|
||||
{ path: `cloudron.container-${name}.network-write`, value: value.networkWrite },
|
||||
{ path: `cloudron.container-${name}.blockio-read`, value: value.blockRead },
|
||||
{ path: `cloudron.container-${name}.blockio-write`, value: value.blockWrite },
|
||||
{ path: `cloudron.container-${name}.mem-used`, value: value.memUsed },
|
||||
{ path: `cloudron.container-${name}.mem-max`, value: value.memMax },
|
||||
{ path: `cloudron.container-${name}.cpu-usage`, value: value.cpuUsage },
|
||||
{ path: `cloudron.container-${name}.memory-used`, value: value.memoryUsed },
|
||||
{ path: `cloudron.container-${name}.memory-max`, value: value.memoryMax },
|
||||
{ path: `cloudron.container-${name}.cpu-usage`, value: value.cpuUsageMsecs },
|
||||
);
|
||||
}
|
||||
|
||||
@@ -165,11 +173,15 @@ async function getContainer(name, options) {
|
||||
// (cpu usage msecs) / (cpus * 1000) is the percent but over all cpus. times 100 is the percent.
|
||||
// but the y-scale is cpus times 100. so, we only need to scale by 0.1
|
||||
`scale(perSecond(cloudron.container-${name}.cpu-usage),0.1)`,
|
||||
`summarize(cloudron.container-${name}.mem-used, "${intervalSecs}s", "avg")`,
|
||||
`summarize(cloudron.container-${name}.blockio-read, "${intervalSecs}s", "sum")`,
|
||||
`summarize(cloudron.container-${name}.blockio-write, "${intervalSecs}s", "sum")`,
|
||||
`summarize(cloudron.container-${name}.network-read, "${intervalSecs}s", "sum")`,
|
||||
`summarize(cloudron.container-${name}.network-write, "${intervalSecs}s", "sum")`,
|
||||
`summarize(cloudron.container-${name}.memory-used, "${intervalSecs}s", "avg")`,
|
||||
|
||||
// get the rate in interval window
|
||||
`summarize(perSecond(cloudron.container-${name}.blockio-read), "${intervalSecs}s", "avg")`,
|
||||
`summarize(perSecond(cloudron.container-${name}.blockio-write), "${intervalSecs}s", "avg")`,
|
||||
`summarize(perSecond(cloudron.container-${name}.network-read), "${intervalSecs}s", "avg")`,
|
||||
`summarize(perSecond(cloudron.container-${name}.network-write), "${intervalSecs}s", "avg")`,
|
||||
|
||||
// just get the max in interval window for absolute numbers
|
||||
`summarize(cloudron.container-${name}.blockio-read, "${intervalSecs}s", "max")`,
|
||||
`summarize(cloudron.container-${name}.blockio-write, "${intervalSecs}s", "max")`,
|
||||
`summarize(cloudron.container-${name}.network-read, "${intervalSecs}s", "max")`,
|
||||
@@ -292,8 +304,11 @@ async function getSystemStream(options) {
|
||||
});
|
||||
|
||||
intervalId = setInterval(async () => {
|
||||
const memoryMetrics = await getMemoryMetrics();
|
||||
const cpuMetrics = await getCpuMetrics();
|
||||
const [memoryResult, cpuResult] = await Promise.allSettled([ readMemoryMetrics(), readCpuMetrics() ]); // never throws
|
||||
if (memoryResult.status !== 'fulfilled' || cpuMetrics.status !== 'fulfilled') return metricsStream.destroy(memoryResult.reason || cpuMetrics.reason);
|
||||
|
||||
const memoryMetrics = memoryResult.value;
|
||||
const cpuMetrics = cpuResult.value;
|
||||
|
||||
const cpuPercent = oldCpuMetrics ? (cpuMetrics.userMsecs + cpuMetrics.sysMsecs - oldCpuMetrics.userMsecs - oldCpuMetrics.sysMsecs) * 0.1 / (INTERVAL_MSECS/1000) : null;
|
||||
oldCpuMetrics = cpuMetrics;
|
||||
@@ -308,3 +323,54 @@ async function getSystemStream(options) {
|
||||
|
||||
return metricsStream;
|
||||
}
|
||||
|
||||
async function getContainerStream(name, options) {
|
||||
assert.strictEqual(typeof name, 'string');
|
||||
assert.strictEqual(typeof options, 'object');
|
||||
|
||||
const INTERVAL_MSECS = options.intervalMsecs || 5000;
|
||||
let intervalId = null, oldMetrics = null;
|
||||
|
||||
const metricsStream = new Readable({
|
||||
read(/*size*/) { /* ignored, we push via interval */ },
|
||||
destroy(error, callback) {
|
||||
clearInterval(intervalId);
|
||||
callback(error);
|
||||
}
|
||||
});
|
||||
|
||||
intervalId = setInterval(async () => {
|
||||
const [error, metrics] = await safe(readContainerMetric(name));
|
||||
if (error) return metricsStream.destroy(error);
|
||||
|
||||
const { networkRead, networkWrite, blockRead, blockWrite, memoryUsed, cpuUsageMsecs } = metrics;
|
||||
|
||||
const cpuPercent = oldMetrics ? (cpuUsageMsecs - oldMetrics.cpuUsageMsecs) * 0.1 / (INTERVAL_MSECS/1000) : null;
|
||||
const blockReadRate = oldMetrics ? (blockRead - oldMetrics.blockRead) / (INTERVAL_MSECS/1000) : null;
|
||||
const blockWriteRate = oldMetrics ? (blockWrite - oldMetrics.blockWrite) / (INTERVAL_MSECS/1000) : null;
|
||||
const networkReadRate = oldMetrics ? (networkRead - oldMetrics.networkRead) / (INTERVAL_MSECS/1000) : null;
|
||||
const networkWriteRate = oldMetrics ? (networkWrite - oldMetrics.networkWrite) / (INTERVAL_MSECS/1000) : null;
|
||||
|
||||
oldMetrics = metrics;
|
||||
|
||||
const now = Date.now() / 1000;
|
||||
metricsStream.push(JSON.stringify({
|
||||
cpu: [ cpuPercent, now ],
|
||||
memory: [ memoryUsed, now ],
|
||||
|
||||
blockRead: [ blockReadRate, now ],
|
||||
blockWrite: [ blockWriteRate, now ],
|
||||
|
||||
networkRead: [ networkReadRate, now ],
|
||||
networkWrite: [ networkWriteRate, now ],
|
||||
|
||||
blockReadTotal: [ blockRead, now ],
|
||||
blockWriteTotal: [ blockWrite, now ],
|
||||
networkReadTotal: [ networkRead, now ],
|
||||
networkWriteTotal: [ networkWrite, now ],
|
||||
cpuCount: os.cpus().length
|
||||
}));
|
||||
}, INTERVAL_MSECS);
|
||||
|
||||
return metricsStream;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user