diff --git a/src/metrics.js b/src/metrics.js index 49785db2e..b9c5a9ca1 100644 --- a/src/metrics.js +++ b/src/metrics.js @@ -28,6 +28,9 @@ const apps = require('./apps.js'), function translateContainerStatsSync(stats) { assert.strictEqual(typeof stats, 'object'); + // the container is missing or stopped. better not to inspect and check State since a race is possible + if (Object.keys(stats.pids_stats) === 0 || Object.keys(stats.memory_stats).length === 0 || stats.blkio_stats.io_service_bytes_recursive === null) return null; + const networkRead = stats.networks ? stats.networks.eth0.rx_bytes : 0; // in host mode (turn), networks is missing const networkWrite = stats.networks ? stats.networks.eth0.tx_bytes : 0; // in host mode (turn), networks is missing @@ -41,7 +44,9 @@ function translateContainerStatsSync(stats) { const cpuUsageMsecs = stats.cpu_stats.cpu_usage.total_usage / 1e6; // convert from nano to msecs (to match system metrics) const systemUsageMsecs = stats.cpu_stats.system_cpu_usage / 1e6; - return { ts: new Date(stats.read), networkRead, networkWrite, blockRead, blockWrite, memoryUsed, memoryMax, cpuUsageMsecs, systemUsageMsecs }; + const pidCount = stats.pids_stats.current; + + return { ts: new Date(stats.read), pidCount, networkRead, networkWrite, blockRead, blockWrite, memoryUsed, memoryMax, cpuUsageMsecs, systemUsageMsecs }; } async function readContainerMetrics() { @@ -56,9 +61,10 @@ async function readContainerMetrics() { const metrics = {}; for (const containerName of containerNames) { const [error, stats] = await safe(docker.getStats(containerName, { stream: false })); - if (error || Object.keys(stats.memory_stats).length === 0) continue; // the container is missing or stopped. better not to inspect and check State since a race is possible + if (error) continue; - metrics[containerName] = translateContainerStatsSync(stats); + const translated = translateContainerStatsSync(stats); + if (translated) metrics[containerName] = translated; } return metrics; @@ -349,6 +355,7 @@ async function readSystemFromGraphite(options) { } // CPU: stress --cpu 2 --timeout 60 +// Memory: stress --cpu 2 --timeout 60 async function getSystem(options) { assert.strictEqual(typeof options, 'object'); @@ -439,6 +446,7 @@ async function getContainerStream(name, options) { statsStream.on('data', (data) => { const stats = JSON.parse(data.toString('utf8')); const metrics = translateContainerStatsSync(stats); + if (!metrics) return; // maybe the container stopped const { ts, networkRead, networkWrite, blockRead, blockWrite, memoryUsed, cpuUsageMsecs } = metrics; @@ -452,7 +460,7 @@ async function getContainerStream(name, options) { oldMetrics = metrics; - const nowSecs = ts.getTime() / 1000; // to match graphite return value + const nowSecs = ts.getTime() / 1000; // conver to secs to match graphite return value metricsStream.push(JSON.stringify({ cpu: [ cpuPercent, nowSecs ], memory: [ memoryUsed, nowSecs ],