'use strict'; exports = module.exports = { getSystem, getContainers, sendToGraphite }; const apps = require('./apps.js'), assert = require('assert'), BoxError = require('./boxerror.js'), constants = require('./constants.js'), debug = require('debug')('box:metrics'), docker = require('./docker.js'), execSync = require('child_process').execSync, net = require('net'), os = require('os'), safe = require('safetydance'), services = require('./services.js'), superagent = require('./superagent.js'); function parseSiSize(size) { const units = { "B": 1, "KB": Math.pow(10, 3), "MB": Math.pow(10, 6), "GB": Math.pow(10, 9), "TB": Math.pow(10, 12) }; const match = size.toUpperCase().match(/^(\d+(\.\d+)?)\s*(\D+)$/); if (!match) { throw new Error("Invalid size format"); } const number = parseFloat(match[1]); const unit = match[3]; return Math.floor(number * units[unit]); } function parseBinarySize(size) { const units = { "B": 1, "KIB": Math.pow(2, 10), "MIB": Math.pow(2, 20), "GIB": Math.pow(2, 30), "TIB": Math.pow(2, 40) }; const match = size.toUpperCase().match(/^(\d+(\.\d+)?)\s*(\D+)$/); if (!match) { throw new Error("Invalid size format"); } const number = parseFloat(match[1]); const unit = match[3]; return Math.floor(number * units[unit]); } async function getDockerMetrics() { const lines = execSync('docker stats --format "{{ json . }}" --no-stream --no-trunc', { encoding: 'utf8' }).trim().split('\n'); const metrics = []; for (const line of lines) { const stat = JSON.parse(line); const name = stat.Name; // appid or addon name const networkData = stat.NetIO.split('/'); const networkRead = parseSiSize(networkData[0].trim()); const networkWrite = parseSiSize(networkData[1].trim()); const blockData = stat.BlockIO.split('/'); const blockRead = parseSiSize(blockData[0].trim()); const blockWrite = parseSiSize(blockData[1].trim()); const memUsageData = stat.MemUsage.split('/'); const memUsed = parseBinarySize(memUsageData[0].trim()); const memMax = parseBinarySize(memUsageData[1].trim()); const cpuPercData = stat.CPUPerc.trim().replace('%', ''); const cpuPerc = parseFloat(cpuPercData); metrics.push( { path: `cloudron.container-${name}.network-read`, value: networkRead }, { path: `cloudron.container-${name}.network-write`, value: networkWrite }, { path: `cloudron.container-${name}.blockio-read`, value: blockRead }, { path: `cloudron.container-${name}.blockio-write`, value: blockWrite }, { path: `cloudron.container-${name}.mem-used`, value: memUsed }, { path: `cloudron.container-${name}.mem-max`, value: memMax }, { path: `cloudron.container-${name}.cpu-perc`, value: cpuPerc }, ); } return metrics; } async function getMemoryMetrics() { // we can also read /proc/meminfo but complicated to match the 'used' output of free const output = execSync('free --bytes --wide', { encoding: 'utf8' }).trim(); // --line is not in older ubuntu const memoryRe = /Mem:\s+(?\d+)\s+(?\d+)\s+(?\d+)\s+(?\d+)\s+(?\d+)\s+(?\d+)\s+(?\d+)/; const memory = output.match(memoryRe); if (!memory) throw new BoxError(BoxError.EXTERNAL_ERROR, 'Could not find memory used'); return [ { path: `cloudron.system.memory-used`, value: memory.groups.used }, ]; } async function getCpuMetrics() { const cpus = os.cpus(); const userMillis = cpus.map(c => c.times.user).reduce((p, c) => p+c); const sysMillis = cpus.map(c => c.times.sys).reduce((p, c) => p+c); // this value is time spent since system start return [ { path: `cloudron.system.cpu-user`, value: userMillis }, { path: `cloudron.system.cpu-sys`, value: sysMillis }, ]; } async function sendToGraphite() { debug('sendStatsToGraphite: collecting stats'); const dockerMetrics = await getDockerMetrics(); const memoryMetrics = await getMemoryMetrics(); const cpuMetrics = await getCpuMetrics(); return new Promise((resolve) => { const client = new net.Socket(); client.connect(constants.GRAPHITE_PORT, '127.0.0.1', () => { debug('connected to graphite'); const now = Math.floor(Date.now() / 1000); for (const metric of [...dockerMetrics, ...memoryMetrics, ...cpuMetrics]) { client.write(`${metric.path} ${metric.value} ${now}\n`); } client.end(); }); client.on('error', (error) => { debug(error); resolve(); }); client.on('end', () => { debug('sent to graphite'); resolve(); }); }); } // for testing locally: curl 'http://${graphite-ip}:8000/graphite-web/render?format=json&from=-1min&target=absolute(collectd.localhost.du-docker.capacity-usage)' // the datapoint is (value, timestamp) https://graphite.readthedocs.io/en/latest/ async function getGraphiteUrl() { const [error, result] = await safe(docker.inspect('graphite')); if (error && error.reason === BoxError.NOT_FOUND) return { status: exports.SERVICE_STATUS_STOPPED }; if (error) throw error; const ip = safe.query(result, 'NetworkSettings.Networks.cloudron.IPAddress', null); if (!ip) throw new BoxError(BoxError.INACTIVE, 'Error getting IP of graphite service'); return `http://${ip}:8000/graphite-web/render`; } async function getContainers(name, options) { assert.strictEqual(typeof name, 'string'); assert.strictEqual(typeof options, 'object'); const { fromSecs, intervalSecs, noNullPoints } = options; const graphiteUrl = await getGraphiteUrl(); const targets = [ `summarize(cloudron.container-${name}.cpu-perc, "${intervalSecs}s", "avg")`, `summarize(cloudron.container-${name}.mem-used, "${intervalSecs}s", "avg")`, `summarize(cloudron.container-${name}.blockio-read, "${intervalSecs}s", "sum")`, `summarize(cloudron.container-${name}.blockio-write, "${intervalSecs}s", "sum")`, `summarize(cloudron.container-${name}.network-read, "${intervalSecs}s", "sum")`, `summarize(cloudron.container-${name}.network-write, "${intervalSecs}s", "sum")`, `summarize(cloudron.container-${name}.blockio-read, "${intervalSecs}s", "max")`, `summarize(cloudron.container-${name}.blockio-write, "${intervalSecs}s", "max")`, `summarize(cloudron.container-${name}.network-read, "${intervalSecs}s", "max")`, `summarize(cloudron.container-${name}.network-write, "${intervalSecs}s", "max")`, ]; const results = []; for (const target of targets) { const query = { target: target, format: 'json', from: `-${fromSecs}s`, until: 'now', noNullPoints: !!noNullPoints }; const [error, response] = await safe(superagent.get(graphiteUrl).query(query).timeout(30 * 1000).ok(() => true)); if (error) throw new BoxError(BoxError.NETWORK_ERROR, error); if (response.status !== 200) throw new BoxError(BoxError.EXTERNAL_ERROR, `Unknown error with ${target}: ${response.status} ${response.text}`); results.push(response.body[0] && response.body[0].datapoints ? response.body[0].datapoints : []); } // results are datapoints[[value, ts], [value, ts], ...]; return { cpu: results[0], memory: results[1], blockRead: results[2], blockWrite: results[3], networkRead: results[4], networkWrite: results[5], blockReadTotal: results[6][0] && results[6][0][0] ? results[6][0][0] : 0, blockWriteTotal: results[7][0] && results[7][0][0] ? results[7][0][0] : 0, networkReadTotal: results[8][0] && results[8][0][0] ? results[8][0][0] : 0, networkWriteTotal: results[9][0] && results[9][0][0] ? results[9][0][0] : 0, cpuCount: os.cpus().length }; } async function getSystemStats(options) { assert.strictEqual(typeof options, 'object'); const { fromSecs, intervalSecs, noNullPoints } = options; const graphiteUrl = await getGraphiteUrl(); // example: curl 'http://172.18.30.5:8000/graphite-web/render?target=cloudron.system.cpu-user&target=cloudron.system.cpu-sys&format=json&from=-1min&until=now&noNullPoints=false' | python3 -m json.tool const targets = [ // perSecond is nonNegativeDerivative over time . this value is the cpu usage in msecs . // (cpu usage msecs) / (cpus * 1000) is the percent but over all cpus. times 100 is the percent. // but the y-scale is cpus times 100. so, we only need to scale by 0.1 `scale(perSecond(sumSeries(cloudron.system.cpu-user,cloudron.system.cpu-sys)),0.1)`, `summarize(cloudron.system.memory-used, "${intervalSecs}s", "avg")` ]; const results = []; for (const target of targets) { const query = { target: target, format: 'json', from: `-${fromSecs}s`, until: 'now', noNullPoints: !!noNullPoints }; const [error, response] = await safe(superagent.get(graphiteUrl).query(query).timeout(30 * 1000).ok(() => true)); if (error) throw new BoxError(BoxError.NETWORK_ERROR, error); if (response.status !== 200) throw new BoxError(BoxError.EXTERNAL_ERROR, `Unknown error with ${target}: ${response.status} ${response.text}`); results.push(response.body[0] && response.body[0].datapoints ? response.body[0].datapoints : []); } return { cpu: results[0], memory: results[1] }; } async function getSystem(options) { assert.strictEqual(typeof options, 'object'); const systemStats = await getSystemStats(options); const appStats = {}; for (const app of await apps.list()) { appStats[app.id] = await getContainers(app.id, options); } const serviceStats = {}; for (const serviceId of await services.listServices()) { serviceStats[serviceId] = await getContainers(serviceId, options); } return { cpu: systemStats.cpu, memory: systemStats.memory, apps: appStats, services: serviceStats, cpuCount: os.cpus().length }; }