apps: add live metrics route

This commit is contained in:
Girish Ramakrishnan
2025-07-01 11:49:37 +02:00
parent 39c0af46b0
commit a357f5a1b8
3 changed files with 127 additions and 32 deletions
+98 -32
View File
@@ -5,6 +5,7 @@ exports = module.exports = {
getSystemStream,
getContainer,
getContainerStream,
sendToGraphite
};
@@ -23,7 +24,28 @@ const apps = require('./apps.js'),
services = require('./services.js'),
superagent = require('./superagent.js');
async function getDockerMetrics() {
async function readContainerMetric(name) {
assert.strictEqual(typeof name, 'string');
const stats = await docker.getStats(name);
if (Object.keys(stats.memory_stats).length === 0) return null; // the container is stopped. better not to inspect and check State since a race is possible
const networkRead = stats.networks ? stats.networks.eth0.rx_bytes : 0; // in host mode (turn), networks is missing
const networkWrite = stats.networks ? stats.networks.eth0.tx_bytes : 0; // in host mode (turn), networks is missing
const memoryUsed = stats.memory_stats.usage;
const memoryMax = stats.memory_stats.limit;
const blkioStats = stats.blkio_stats.io_service_bytes_recursive;
const blockRead = blkioStats.filter(entry => entry.op === 'read').reduce((sum, entry) => sum + entry.value, 0);
const blockWrite = blkioStats.filter(entry => entry.op === 'write').reduce((sum, entry) => sum + entry.value, 0);
const cpuUsageMsecs = stats.cpu_stats.cpu_usage.total_usage / 1000; // convert to msecs (to match system metrics)
return { networkRead, networkWrite, blockRead, blockWrite, memoryUsed, memoryMax, cpuUsageMsecs };
}
async function readContainerMetrics() {
const allAddons = [ 'turn', 'mail', 'mongodb', 'mysql', 'postgresql' ];
const containerNames = allAddons;
@@ -34,28 +56,14 @@ async function getDockerMetrics() {
const metrics = {};
for (const containerName of containerNames) {
const stats = await docker.getStats(containerName);
if (Object.keys(stats.memory_stats).length === 0) continue; // the container is stopped. better not to inspect and check State since a race is possible
const networkRead = stats.networks ? stats.networks.eth0.rx_bytes : 0; // in host mode (turn), networks is missing
const networkWrite = stats.networks ? stats.networks.eth0.tx_bytes : 0; // in host mode (turn), networks is missing
const memUsed = stats.memory_stats.usage;
const memMax = stats.memory_stats.limit;
const blkioStats = stats.blkio_stats.io_service_bytes_recursive;
const blockRead = blkioStats.filter(entry => entry.op === 'read').reduce((sum, entry) => sum + entry.value, 0);
const blockWrite = blkioStats.filter(entry => entry.op === 'write').reduce((sum, entry) => sum + entry.value, 0);
const cpuUsage = stats.cpu_stats.cpu_usage.total_usage / 1000; // convert to msecs (to match system metrics)
metrics[containerName] = { networkRead, networkWrite, blockRead, blockWrite, memUsed, memMax, cpuUsage };
const stats = await readContainerMetric(containerName);
if (stats) metrics[containerName] = stats;
}
return metrics;
}
async function getMemoryMetrics() {
async function readMemoryMetrics() {
const output = await fs.promises.readFile('/proc/meminfo', { encoding: 'utf8' });
const totalMemoryMatch = output.match(/^MemTotal:\s+(\d+)/m);
@@ -80,7 +88,7 @@ async function getMemoryMetrics() {
};
}
async function getCpuMetrics() {
async function readCpuMetrics() {
const cpus = os.cpus();
const userMsecs = cpus.map(c => c.times.user).reduce((p, c) => p+c);
const sysMsecs = cpus.map(c => c.times.sys).reduce((p, c) => p+c);
@@ -93,24 +101,24 @@ async function sendToGraphite() {
const graphiteMetrics = [];
const memoryMetrics = await getMemoryMetrics();
const memoryMetrics = await readMemoryMetrics();
graphiteMetrics.push({ path: `cloudron.system.memory-used`, value: memoryMetrics.memoryUsed });
graphiteMetrics.push({ path: `cloudron.system.swap-used`, value: memoryMetrics.swapUsed });
const cpuMetrics = await getCpuMetrics();
const cpuMetrics = await readCpuMetrics();
graphiteMetrics.push({ path: `cloudron.system.cpu-user`, value: cpuMetrics.userMsecs });
graphiteMetrics.push({ path: `cloudron.system.cpu-sys`, value: cpuMetrics.sysMsecs });
const dockerMetrics = await getDockerMetrics();
const dockerMetrics = await readContainerMetrics();
for (const [name, value] of Object.entries(dockerMetrics)) {
graphiteMetrics.push(
{ path: `cloudron.container-${name}.network-read`, value: value.networkRead },
{ path: `cloudron.container-${name}.network-write`, value: value.networkWrite },
{ path: `cloudron.container-${name}.blockio-read`, value: value.blockRead },
{ path: `cloudron.container-${name}.blockio-write`, value: value.blockWrite },
{ path: `cloudron.container-${name}.mem-used`, value: value.memUsed },
{ path: `cloudron.container-${name}.mem-max`, value: value.memMax },
{ path: `cloudron.container-${name}.cpu-usage`, value: value.cpuUsage },
{ path: `cloudron.container-${name}.memory-used`, value: value.memoryUsed },
{ path: `cloudron.container-${name}.memory-max`, value: value.memoryMax },
{ path: `cloudron.container-${name}.cpu-usage`, value: value.cpuUsageMsecs },
);
}
@@ -165,11 +173,15 @@ async function getContainer(name, options) {
// (cpu usage msecs) / (cpus * 1000) is the percent but over all cpus. times 100 is the percent.
// but the y-scale is cpus times 100. so, we only need to scale by 0.1
`scale(perSecond(cloudron.container-${name}.cpu-usage),0.1)`,
`summarize(cloudron.container-${name}.mem-used, "${intervalSecs}s", "avg")`,
`summarize(cloudron.container-${name}.blockio-read, "${intervalSecs}s", "sum")`,
`summarize(cloudron.container-${name}.blockio-write, "${intervalSecs}s", "sum")`,
`summarize(cloudron.container-${name}.network-read, "${intervalSecs}s", "sum")`,
`summarize(cloudron.container-${name}.network-write, "${intervalSecs}s", "sum")`,
`summarize(cloudron.container-${name}.memory-used, "${intervalSecs}s", "avg")`,
// get the rate in interval window
`summarize(perSecond(cloudron.container-${name}.blockio-read), "${intervalSecs}s", "avg")`,
`summarize(perSecond(cloudron.container-${name}.blockio-write), "${intervalSecs}s", "avg")`,
`summarize(perSecond(cloudron.container-${name}.network-read), "${intervalSecs}s", "avg")`,
`summarize(perSecond(cloudron.container-${name}.network-write), "${intervalSecs}s", "avg")`,
// just get the max in interval window for absolute numbers
`summarize(cloudron.container-${name}.blockio-read, "${intervalSecs}s", "max")`,
`summarize(cloudron.container-${name}.blockio-write, "${intervalSecs}s", "max")`,
`summarize(cloudron.container-${name}.network-read, "${intervalSecs}s", "max")`,
@@ -292,8 +304,11 @@ async function getSystemStream(options) {
});
intervalId = setInterval(async () => {
const memoryMetrics = await getMemoryMetrics();
const cpuMetrics = await getCpuMetrics();
const [memoryResult, cpuResult] = await Promise.allSettled([ readMemoryMetrics(), readCpuMetrics() ]); // never throws
if (memoryResult.status !== 'fulfilled' || cpuMetrics.status !== 'fulfilled') return metricsStream.destroy(memoryResult.reason || cpuMetrics.reason);
const memoryMetrics = memoryResult.value;
const cpuMetrics = cpuResult.value;
const cpuPercent = oldCpuMetrics ? (cpuMetrics.userMsecs + cpuMetrics.sysMsecs - oldCpuMetrics.userMsecs - oldCpuMetrics.sysMsecs) * 0.1 / (INTERVAL_MSECS/1000) : null;
oldCpuMetrics = cpuMetrics;
@@ -308,3 +323,54 @@ async function getSystemStream(options) {
return metricsStream;
}
async function getContainerStream(name, options) {
assert.strictEqual(typeof name, 'string');
assert.strictEqual(typeof options, 'object');
const INTERVAL_MSECS = options.intervalMsecs || 5000;
let intervalId = null, oldMetrics = null;
const metricsStream = new Readable({
read(/*size*/) { /* ignored, we push via interval */ },
destroy(error, callback) {
clearInterval(intervalId);
callback(error);
}
});
intervalId = setInterval(async () => {
const [error, metrics] = await safe(readContainerMetric(name));
if (error) return metricsStream.destroy(error);
const { networkRead, networkWrite, blockRead, blockWrite, memoryUsed, cpuUsageMsecs } = metrics;
const cpuPercent = oldMetrics ? (cpuUsageMsecs - oldMetrics.cpuUsageMsecs) * 0.1 / (INTERVAL_MSECS/1000) : null;
const blockReadRate = oldMetrics ? (blockRead - oldMetrics.blockRead) / (INTERVAL_MSECS/1000) : null;
const blockWriteRate = oldMetrics ? (blockWrite - oldMetrics.blockWrite) / (INTERVAL_MSECS/1000) : null;
const networkReadRate = oldMetrics ? (networkRead - oldMetrics.networkRead) / (INTERVAL_MSECS/1000) : null;
const networkWriteRate = oldMetrics ? (networkWrite - oldMetrics.networkWrite) / (INTERVAL_MSECS/1000) : null;
oldMetrics = metrics;
const now = Date.now() / 1000;
metricsStream.push(JSON.stringify({
cpu: [ cpuPercent, now ],
memory: [ memoryUsed, now ],
blockRead: [ blockReadRate, now ],
blockWrite: [ blockWriteRate, now ],
networkRead: [ networkReadRate, now ],
networkWrite: [ networkWriteRate, now ],
blockReadTotal: [ blockRead, now ],
blockWriteTotal: [ blockWrite, now ],
networkReadTotal: [ networkRead, now ],
networkWriteTotal: [ networkWrite, now ],
cpuCount: os.cpus().length
}));
}, INTERVAL_MSECS);
return metricsStream;
}