Files
cloudron-box/src/metrics.js
T

311 lines
12 KiB
JavaScript
Raw Normal View History

2022-09-14 13:03:14 +02:00
'use strict';
exports = module.exports = {
getSystem,
2025-05-21 17:15:04 +02:00
getSystemStream,
2025-07-01 09:46:24 +02:00
getContainer,
2025-05-21 16:32:52 +02:00
sendToGraphite
2022-09-14 13:03:14 +02:00
};
2022-10-12 22:08:10 +02:00
const apps = require('./apps.js'),
assert = require('assert'),
2022-09-14 13:03:14 +02:00
BoxError = require('./boxerror.js'),
2025-05-21 16:32:52 +02:00
constants = require('./constants.js'),
debug = require('debug')('box:metrics'),
docker = require('./docker.js'),
2025-05-23 16:11:48 +02:00
fs = require('fs'),
2025-05-21 16:32:52 +02:00
net = require('net'),
2022-10-13 22:36:20 +02:00
os = require('os'),
2025-05-21 17:15:04 +02:00
{ Readable } = require('stream'),
2022-09-14 13:03:14 +02:00
safe = require('safetydance'),
2022-10-13 20:32:36 +02:00
services = require('./services.js'),
superagent = require('./superagent.js');
2022-09-14 13:03:14 +02:00
async function getDockerMetrics() {
const allAddons = [ 'turn', 'mail', 'mongodb', 'mysql', 'postgresql' ];
2025-05-21 16:32:52 +02:00
const containerNames = allAddons;
for (const app of await apps.list()) {
if (app.containerId) containerNames.push(app.containerId); // containerId can be null if app is installing
if (app.manifest.addons?.redis && app.enableRedis) containerNames.push(`redis-${app.id}`);
2025-05-21 16:32:52 +02:00
}
const metrics = {};
for (const containerName of containerNames) {
const stats = await docker.getStats(containerName);
if (Object.keys(stats.memory_stats).length === 0) continue; // the container is stopped. better not to inspect and check State since a race is possible
2025-05-21 16:32:52 +02:00
const networkRead = stats.networks ? stats.networks.eth0.rx_bytes : 0; // in host mode (turn), networks is missing
const networkWrite = stats.networks ? stats.networks.eth0.tx_bytes : 0; // in host mode (turn), networks is missing
2025-05-21 16:32:52 +02:00
const memUsed = stats.memory_stats.usage;
const memMax = stats.memory_stats.limit;
2025-05-21 16:32:52 +02:00
const blkioStats = stats.blkio_stats.io_service_bytes_recursive;
const blockRead = blkioStats.filter(entry => entry.op === 'read').reduce((sum, entry) => sum + entry.value, 0);
const blockWrite = blkioStats.filter(entry => entry.op === 'write').reduce((sum, entry) => sum + entry.value, 0);
2025-05-21 16:32:52 +02:00
const cpuUsage = stats.cpu_stats.cpu_usage.total_usage / 1000; // convert to msecs (to match system metrics)
2025-05-21 16:32:52 +02:00
metrics[containerName] = { networkRead, networkWrite, blockRead, blockWrite, memUsed, memMax, cpuUsage };
2025-05-21 16:32:52 +02:00
}
return metrics;
}
async function getMemoryMetrics() {
2025-05-23 16:11:48 +02:00
const output = await fs.promises.readFile('/proc/meminfo', { encoding: 'utf8' });
2025-05-21 16:32:52 +02:00
2025-05-23 16:11:48 +02:00
const totalMemoryMatch = output.match(/^MemTotal:\s+(\d+)/m);
const freeMemoryMatch = output.match(/^MemFree:\s+(\d+)/m);
const buffersMatch = output.match(/^Buffers:\s+(\d+)/m);
const cachedMatch = output.match(/^Cached:\s+(\d+)/m);
2025-05-21 16:32:52 +02:00
2025-05-23 16:11:48 +02:00
if (!totalMemoryMatch || !freeMemoryMatch || !buffersMatch || !cachedMatch) throw new BoxError(BoxError.EXTERNAL_ERROR, 'Could not find memory used');
const memoryUsed = parseInt(totalMemoryMatch[1]) * 1024 - parseInt(freeMemoryMatch[1]) * 1024 - parseInt(buffersMatch[1]) * 1024 - parseInt(cachedMatch[1]) * 1024;
const swapTotalMatch = output.match(/^SwapTotal:\s+(\d+)/m);
const swapFreeMatch = output.match(/^SwapFree:\s+(\d+)/m);
if (!swapTotalMatch || !swapFreeMatch) throw new BoxError(BoxError.EXTERNAL_ERROR, 'Could not find swap used');
const swapUsed = parseInt(swapTotalMatch[1]) * 1024 - parseInt(swapFreeMatch[1]) * 1024;
2025-05-22 10:21:21 +02:00
return {
2025-05-23 16:11:48 +02:00
memoryUsed,
swapUsed
2025-05-22 10:21:21 +02:00
};
2025-05-21 16:32:52 +02:00
}
async function getCpuMetrics() {
const cpus = os.cpus();
const userMsecs = cpus.map(c => c.times.user).reduce((p, c) => p+c);
const sysMsecs = cpus.map(c => c.times.sys).reduce((p, c) => p+c);
return { userMsecs, sysMsecs }; // these values are the times spent since system start
2025-05-21 16:32:52 +02:00
}
async function sendToGraphite() {
2025-06-19 10:17:29 +02:00
// debug('sendStatsToGraphite: collecting stats');
const graphiteMetrics = [];
2025-05-21 16:32:52 +02:00
const memoryMetrics = await getMemoryMetrics();
2025-05-22 10:21:21 +02:00
graphiteMetrics.push({ path: `cloudron.system.memory-used`, value: memoryMetrics.memoryUsed });
graphiteMetrics.push({ path: `cloudron.system.swap-used`, value: memoryMetrics.swapUsed });
2025-05-21 16:32:52 +02:00
const cpuMetrics = await getCpuMetrics();
graphiteMetrics.push({ path: `cloudron.system.cpu-user`, value: cpuMetrics.userMsecs });
graphiteMetrics.push({ path: `cloudron.system.cpu-sys`, value: cpuMetrics.sysMsecs });
const dockerMetrics = await getDockerMetrics();
for (const [name, value] of Object.entries(dockerMetrics)) {
graphiteMetrics.push(
{ path: `cloudron.container-${name}.network-read`, value: value.networkRead },
{ path: `cloudron.container-${name}.network-write`, value: value.networkWrite },
{ path: `cloudron.container-${name}.blockio-read`, value: value.blockRead },
{ path: `cloudron.container-${name}.blockio-write`, value: value.blockWrite },
{ path: `cloudron.container-${name}.mem-used`, value: value.memUsed },
{ path: `cloudron.container-${name}.mem-max`, value: value.memMax },
{ path: `cloudron.container-${name}.cpu-usage`, value: value.cpuUsage },
);
}
2025-05-21 16:32:52 +02:00
return new Promise((resolve) => {
const client = new net.Socket();
client.connect(constants.GRAPHITE_PORT, '127.0.0.1', () => {
debug('connected to graphite');
const now = Math.floor(Date.now() / 1000);
for (const metric of graphiteMetrics) {
2025-05-21 16:32:52 +02:00
client.write(`${metric.path} ${metric.value} ${now}\n`);
}
client.end();
});
client.on('error', (error) => {
debug(error);
resolve();
});
client.on('end', () => {
debug('sent to graphite');
resolve();
});
});
}
// for testing locally: curl 'http://${graphite-ip}:8000/graphite-web/render?format=json&from=-1min&target=absolute(collectd.localhost.du-docker.capacity-usage)'
// the datapoint is (value, timestamp) https://graphite.readthedocs.io/en/latest/
async function getGraphiteUrl() {
const [error, result] = await safe(docker.inspect('graphite'));
if (error && error.reason === BoxError.NOT_FOUND) return { status: exports.SERVICE_STATUS_STOPPED };
if (error) throw error;
const ip = safe.query(result, 'NetworkSettings.Networks.cloudron.IPAddress', null);
if (!ip) throw new BoxError(BoxError.INACTIVE, 'Error getting IP of graphite service');
return `http://${ip}:8000/graphite-web/render`;
}
2022-09-14 13:03:14 +02:00
2025-07-01 09:46:24 +02:00
async function getContainer(name, options) {
2022-10-13 20:32:36 +02:00
assert.strictEqual(typeof name, 'string');
2025-05-20 19:09:12 +02:00
assert.strictEqual(typeof options, 'object');
const { fromSecs, intervalSecs, noNullPoints } = options;
2022-09-14 13:03:14 +02:00
const graphiteUrl = await getGraphiteUrl();
2022-09-14 13:03:14 +02:00
2022-10-10 19:52:29 +02:00
const targets = [
// perSecond is nonNegativeDerivative over time . this value is the cpu usage in msecs .
// (cpu usage msecs) / (cpus * 1000) is the percent but over all cpus. times 100 is the percent.
// but the y-scale is cpus times 100. so, we only need to scale by 0.1
`scale(perSecond(cloudron.container-${name}.cpu-usage),0.1)`,
2025-05-20 19:09:12 +02:00
`summarize(cloudron.container-${name}.mem-used, "${intervalSecs}s", "avg")`,
`summarize(cloudron.container-${name}.blockio-read, "${intervalSecs}s", "sum")`,
`summarize(cloudron.container-${name}.blockio-write, "${intervalSecs}s", "sum")`,
`summarize(cloudron.container-${name}.network-read, "${intervalSecs}s", "sum")`,
`summarize(cloudron.container-${name}.network-write, "${intervalSecs}s", "sum")`,
`summarize(cloudron.container-${name}.blockio-read, "${intervalSecs}s", "max")`,
`summarize(cloudron.container-${name}.blockio-write, "${intervalSecs}s", "max")`,
`summarize(cloudron.container-${name}.network-read, "${intervalSecs}s", "max")`,
`summarize(cloudron.container-${name}.network-write, "${intervalSecs}s", "max")`,
2022-10-10 19:52:29 +02:00
];
2022-09-14 13:03:14 +02:00
2022-10-10 19:52:29 +02:00
const results = [];
2022-09-14 13:03:14 +02:00
2022-10-10 19:52:29 +02:00
for (const target of targets) {
const query = {
target: target,
format: 'json',
2025-05-20 19:09:12 +02:00
from: `-${fromSecs}s`,
2022-10-10 19:52:29 +02:00
until: 'now',
noNullPoints: !!noNullPoints
};
2022-09-14 13:03:14 +02:00
const [error, response] = await safe(superagent.get(graphiteUrl).query(query).timeout(30 * 1000).ok(() => true));
2024-11-19 17:08:19 +05:30
if (error) throw new BoxError(BoxError.NETWORK_ERROR, error);
2022-10-10 19:52:29 +02:00
if (response.status !== 200) throw new BoxError(BoxError.EXTERNAL_ERROR, `Unknown error with ${target}: ${response.status} ${response.text}`);
2022-09-14 13:03:14 +02:00
results.push(response.body[0] && response.body[0].datapoints ? response.body[0].datapoints : []);
}
2022-09-14 13:03:14 +02:00
// results are datapoints[[value, ts], [value, ts], ...];
2022-10-10 19:52:29 +02:00
return {
cpu: results[0],
memory: results[1],
blockRead: results[2],
blockWrite: results[3],
networkRead: results[4],
networkWrite: results[5],
blockReadTotal: results[6][0] && results[6][0][0] ? results[6][0][0] : 0,
blockWriteTotal: results[7][0] && results[7][0][0] ? results[7][0][0] : 0,
networkReadTotal: results[8][0] && results[8][0][0] ? results[8][0][0] : 0,
networkWriteTotal: results[9][0] && results[9][0][0] ? results[9][0][0] : 0,
cpuCount: os.cpus().length
2022-10-10 19:52:29 +02:00
};
2022-09-14 13:03:14 +02:00
}
2025-05-21 17:15:04 +02:00
async function readSystemFromGraphite(options) {
2025-05-20 19:09:12 +02:00
assert.strictEqual(typeof options, 'object');
const { fromSecs, intervalSecs, noNullPoints } = options;
const graphiteUrl = await getGraphiteUrl();
2025-05-20 22:31:26 +02:00
// example: curl 'http://172.18.30.5:8000/graphite-web/render?target=cloudron.system.cpu-user&target=cloudron.system.cpu-sys&format=json&from=-1min&until=now&noNullPoints=false' | python3 -m json.tool
const targets = [
2025-05-20 22:31:26 +02:00
// perSecond is nonNegativeDerivative over time . this value is the cpu usage in msecs .
// (cpu usage msecs) / (cpus * 1000) is the percent but over all cpus. times 100 is the percent.
// but the y-scale is cpus times 100. so, we only need to scale by 0.1
`scale(perSecond(sumSeries(cloudron.system.cpu-user,cloudron.system.cpu-sys)),0.1)`,
2025-05-22 10:21:21 +02:00
`summarize(cloudron.system.memory-used, "${intervalSecs}s", "avg")`,
`summarize(cloudron.system.swap-used, "${intervalSecs}s", "avg")`,
];
const results = [];
for (const target of targets) {
const query = {
target: target,
format: 'json',
2025-05-20 19:09:12 +02:00
from: `-${fromSecs}s`,
until: 'now',
noNullPoints: !!noNullPoints
};
const [error, response] = await safe(superagent.get(graphiteUrl).query(query).timeout(30 * 1000).ok(() => true));
if (error) throw new BoxError(BoxError.NETWORK_ERROR, error);
if (response.status !== 200) throw new BoxError(BoxError.EXTERNAL_ERROR, `Unknown error with ${target}: ${response.status} ${response.text}`);
results.push(response.body[0] && response.body[0].datapoints ? response.body[0].datapoints : []);
}
return {
cpu: results[0],
2025-05-22 10:21:21 +02:00
memory: results[1],
swap: results[2]
};
}
2025-05-20 19:09:12 +02:00
async function getSystem(options) {
assert.strictEqual(typeof options, 'object');
2025-05-21 17:15:04 +02:00
const systemStats = await readSystemFromGraphite(options);
const appStats = {};
2022-10-12 22:08:10 +02:00
for (const app of await apps.list()) {
2025-07-01 09:46:24 +02:00
appStats[app.id] = await getContainer(app.id, options);
2022-10-12 22:08:10 +02:00
}
const serviceStats = {};
2022-10-13 20:32:36 +02:00
for (const serviceId of await services.listServices()) {
2025-07-01 09:46:24 +02:00
serviceStats[serviceId] = await getContainer(serviceId, options);
2022-10-13 20:32:36 +02:00
}
2022-10-13 22:36:20 +02:00
return {
cpu: systemStats.cpu,
memory: systemStats.memory,
2025-05-22 10:21:21 +02:00
swap: systemStats.swap,
apps: appStats,
services: serviceStats,
2022-10-13 22:36:20 +02:00
cpuCount: os.cpus().length
};
}
2025-05-21 17:15:04 +02:00
2025-05-22 11:17:31 +02:00
async function getSystemStream(options) {
assert.strictEqual(typeof options, 'object');
2025-05-22 12:18:31 +02:00
const INTERVAL_MSECS = options.intervalMsecs || 5000;
2025-05-21 17:15:04 +02:00
let intervalId = null, oldCpuMetrics = null;
const metricsStream = new Readable({
read(/*size*/) { /* ignored, we push via interval */ },
destroy(error, callback) {
clearInterval(intervalId);
callback(error);
}
});
intervalId = setInterval(async () => {
const memoryMetrics = await getMemoryMetrics();
const cpuMetrics = await getCpuMetrics();
2025-05-22 12:18:31 +02:00
const cpuPercent = oldCpuMetrics ? (cpuMetrics.userMsecs + cpuMetrics.sysMsecs - oldCpuMetrics.userMsecs - oldCpuMetrics.sysMsecs) * 0.1 / (INTERVAL_MSECS/1000) : null;
2025-05-21 17:15:04 +02:00
oldCpuMetrics = cpuMetrics;
2025-05-22 12:09:42 +02:00
const now = Date.now() / 1000;
2025-05-21 17:15:04 +02:00
metricsStream.push(JSON.stringify({
cpu: [ cpuPercent, now ],
2025-05-22 10:21:21 +02:00
memory: [ memoryMetrics.memoryUsed, now ],
swap: [ memoryMetrics.swapUsed, now ],
2025-05-21 17:15:04 +02:00
}));
2025-05-22 12:18:31 +02:00
}, INTERVAL_MSECS);
2025-05-21 17:15:04 +02:00
return metricsStream;
}