2022-09-14 13:03:14 +02:00
'use strict' ;
exports = module . exports = {
2025-07-07 15:53:09 +02:00
get ,
getStream ,
2025-05-21 16:32:52 +02:00
sendToGraphite
2022-09-14 13:03:14 +02:00
} ;
2022-10-12 22:08:10 +02:00
const apps = require ( './apps.js' ) ,
assert = require ( 'assert' ) ,
2022-09-14 13:03:14 +02:00
BoxError = require ( './boxerror.js' ) ,
2025-05-21 16:32:52 +02:00
constants = require ( './constants.js' ) ,
debug = require ( 'debug' ) ( 'box:metrics' ) ,
2022-10-11 12:44:37 +02:00
docker = require ( './docker.js' ) ,
2025-05-23 16:11:48 +02:00
fs = require ( 'fs' ) ,
2025-05-21 16:32:52 +02:00
net = require ( 'net' ) ,
2022-10-13 22:36:20 +02:00
os = require ( 'os' ) ,
2025-07-01 15:43:49 +02:00
path = require ( 'path' ) ,
2025-05-21 17:15:04 +02:00
{ Readable } = require ( 'stream' ) ,
2022-09-14 13:03:14 +02:00
safe = require ( 'safetydance' ) ,
2025-02-14 17:26:54 +01:00
superagent = require ( './superagent.js' ) ;
2022-09-14 13:03:14 +02:00
2025-07-03 19:01:40 +02:00
function translateContainerStatsSync ( stats ) {
assert . strictEqual ( typeof stats , 'object' ) ;
2025-07-01 11:49:37 +02:00
2025-07-04 09:35:28 +02:00
// the container is missing or stopped. better not to inspect and check State since a race is possible
if ( Object . keys ( stats . pids _stats ) === 0 || Object . keys ( stats . memory _stats ) . length === 0 || stats . blkio _stats . io _service _bytes _recursive === null ) return null ;
2025-07-01 11:49:37 +02:00
const networkRead = stats . networks ? stats . networks . eth0 . rx _bytes : 0 ; // in host mode (turn), networks is missing
const networkWrite = stats . networks ? stats . networks . eth0 . tx _bytes : 0 ; // in host mode (turn), networks is missing
const memoryUsed = stats . memory _stats . usage ;
const memoryMax = stats . memory _stats . limit ;
const blkioStats = stats . blkio _stats . io _service _bytes _recursive ;
const blockRead = blkioStats . filter ( entry => entry . op === 'read' ) . reduce ( ( sum , entry ) => sum + entry . value , 0 ) ;
const blockWrite = blkioStats . filter ( entry => entry . op === 'write' ) . reduce ( ( sum , entry ) => sum + entry . value , 0 ) ;
2025-07-03 18:43:42 +02:00
const cpuUsageMsecs = stats . cpu _stats . cpu _usage . total _usage / 1e6 ; // convert from nano to msecs (to match system metrics)
2025-07-03 19:01:40 +02:00
const systemUsageMsecs = stats . cpu _stats . system _cpu _usage / 1e6 ;
2025-07-01 11:49:37 +02:00
2025-07-04 09:35:28 +02:00
const pidCount = stats . pids _stats . current ;
return { ts : new Date ( stats . read ) , pidCount , networkRead , networkWrite , blockRead , blockWrite , memoryUsed , memoryMax , cpuUsageMsecs , systemUsageMsecs } ;
2025-07-01 11:49:37 +02:00
}
async function readContainerMetrics ( ) {
2025-07-01 10:48:51 +02:00
const allAddons = [ 'turn' , 'mail' , 'mongodb' , 'mysql' , 'postgresql' ] ;
2025-05-21 16:32:52 +02:00
2025-07-01 10:48:51 +02:00
const containerNames = allAddons ;
for ( const app of await apps . list ( ) ) {
2025-07-04 10:05:07 +02:00
if ( app . containerId ) containerNames . push ( app . id ) ; // containerId can be null if app is installing. metrics must be stored by appId since container id changes over time
2025-07-01 10:48:51 +02:00
if ( app . manifest . addons ? . redis && app . enableRedis ) containerNames . push ( ` redis- ${ app . id } ` ) ;
2025-05-21 16:32:52 +02:00
}
2025-05-21 16:45:37 +02:00
const metrics = { } ;
2025-07-01 10:48:51 +02:00
for ( const containerName of containerNames ) {
2025-07-03 19:01:40 +02:00
const [ error , stats ] = await safe ( docker . getStats ( containerName , { stream : false } ) ) ;
2025-07-04 09:35:28 +02:00
if ( error ) continue ;
2025-07-03 19:01:40 +02:00
2025-07-04 09:35:28 +02:00
const translated = translateContainerStatsSync ( stats ) ;
if ( translated ) metrics [ containerName ] = translated ;
2025-05-21 16:32:52 +02:00
}
return metrics ;
}
2025-07-01 11:49:37 +02:00
async function readMemoryMetrics ( ) {
2025-05-23 16:11:48 +02:00
const output = await fs . promises . readFile ( '/proc/meminfo' , { encoding : 'utf8' } ) ;
2025-05-21 16:32:52 +02:00
2025-05-23 16:11:48 +02:00
const totalMemoryMatch = output . match ( /^MemTotal:\s+(\d+)/m ) ;
const freeMemoryMatch = output . match ( /^MemFree:\s+(\d+)/m ) ;
const buffersMatch = output . match ( /^Buffers:\s+(\d+)/m ) ;
const cachedMatch = output . match ( /^Cached:\s+(\d+)/m ) ;
2025-05-21 16:32:52 +02:00
2025-05-23 16:11:48 +02:00
if ( ! totalMemoryMatch || ! freeMemoryMatch || ! buffersMatch || ! cachedMatch ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Could not find memory used' ) ;
const memoryUsed = parseInt ( totalMemoryMatch [ 1 ] ) * 1024 - parseInt ( freeMemoryMatch [ 1 ] ) * 1024 - parseInt ( buffersMatch [ 1 ] ) * 1024 - parseInt ( cachedMatch [ 1 ] ) * 1024 ;
const swapTotalMatch = output . match ( /^SwapTotal:\s+(\d+)/m ) ;
const swapFreeMatch = output . match ( /^SwapFree:\s+(\d+)/m ) ;
if ( ! swapTotalMatch || ! swapFreeMatch ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Could not find swap used' ) ;
const swapUsed = parseInt ( swapTotalMatch [ 1 ] ) * 1024 - parseInt ( swapFreeMatch [ 1 ] ) * 1024 ;
2025-05-22 10:21:21 +02:00
return {
2025-05-23 16:11:48 +02:00
memoryUsed ,
swapUsed
2025-05-22 10:21:21 +02:00
} ;
2025-05-21 16:32:52 +02:00
}
2025-07-01 11:49:37 +02:00
async function readCpuMetrics ( ) {
2025-05-21 16:32:52 +02:00
const cpus = os . cpus ( ) ;
2025-05-21 16:45:37 +02:00
const userMsecs = cpus . map ( c => c . times . user ) . reduce ( ( p , c ) => p + c ) ;
const sysMsecs = cpus . map ( c => c . times . sys ) . reduce ( ( p , c ) => p + c ) ;
return { userMsecs , sysMsecs } ; // these values are the times spent since system start
2025-05-21 16:32:52 +02:00
}
2025-07-01 15:43:49 +02:00
async function readDiskMetrics ( ) {
const mounts = await fs . promises . readFile ( '/proc/mounts' , { encoding : 'utf8' } ) ;
const rootLine = mounts . split ( '\n' ) . find ( line => line . split ( ' ' ) [ 1 ] === '/' ) ;
if ( ! rootLine ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Root mount not found' ) ;
2025-06-19 10:17:29 +02:00
2025-07-01 15:43:49 +02:00
const devicePath = rootLine . split ( ' ' ) [ 0 ] ; // e.g., "/dev/sda1"
const base = path . basename ( devicePath ) ; // remove /dev/
const match = base . match ( /^(.*?)(p?[0-9]+)?$/ ) ;
const blockDevice = match ? match [ 1 ] : base ;
if ( ! blockDevice ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Could not find root block device name' ) ;
2025-05-21 16:45:37 +02:00
2025-07-01 15:43:49 +02:00
const diskstats = await fs . promises . readFile ( '/proc/diskstats' , { encoding : 'utf8' } ) ;
const statsLine = diskstats . split ( '\n' ) . find ( l => l . includes ( ` ${ blockDevice } ` ) ) ;
if ( ! blockDevice ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Could not get disk stats' ) ;
const parts = statsLine . trim ( ) . split ( /\s+/ ) ;
const sectorsRead = parseInt ( parts [ 5 ] , 10 ) ; // field 6 . one sectiro is 512 bytes
const sectorsWrite = parseInt ( parts [ 9 ] , 10 ) ; // field 10
const blockRead = sectorsRead * 512 ;
const blockWrite = sectorsWrite * 512 ;
return { blockRead , blockWrite } ;
}
async function readNetworkMetrics ( ) {
const contents = await fs . promises . readFile ( '/proc/net/route' , { encoding : 'utf8' } ) ;
const lines = contents . trim ( ) . split ( '\n' ) . slice ( 1 ) ; // skip header
let defaultIface = null ;
for ( const line of lines ) {
const [ iface , destination ] = line . split ( /\s+/ ) ;
if ( destination === '00000000' ) {
defaultIface = iface ; // default route
break ;
}
}
if ( ! defaultIface ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Could not detect default interface' ) ;
const [ rx , tx ] = await Promise . all ( [
fs . promises . readFile ( ` /sys/class/net/ ${ defaultIface } /statistics/rx_bytes ` , { encoding : 'utf8' } ) ,
fs . promises . readFile ( ` /sys/class/net/ ${ defaultIface } /statistics/tx_bytes ` , { encoding : 'utf8' } )
] ) ;
return {
networkRead : parseInt ( rx . trim ( ) , 10 ) ,
networkWrite : parseInt ( tx . trim ( ) , 10 )
} ;
}
async function readSystemMetrics ( ) {
const memoryMetrics = await readMemoryMetrics ( ) ;
2025-07-01 11:49:37 +02:00
const cpuMetrics = await readCpuMetrics ( ) ;
2025-07-01 15:43:49 +02:00
const diskMetrics = await readDiskMetrics ( ) ;
const networkMetrics = await readNetworkMetrics ( ) ;
// { memoryUsed, swapUsed, userMsecs, sysMsecs, blockRead, blockWrite, networkRead, networkWrite }
return { ... memoryMetrics , ... cpuMetrics , ... diskMetrics , ... networkMetrics } ;
}
async function sendToGraphite ( ) {
// debug('sendStatsToGraphite: collecting stats');
const result = await readSystemMetrics ( ) ;
const graphiteMetrics = [
{ path : ` cloudron.system.memory-used ` , value : result . memoryUsed } ,
{ path : ` cloudron.system.swap-used ` , value : result . swapUsed } ,
{ path : ` cloudron.system.cpu-user ` , value : result . userMsecs } ,
{ path : ` cloudron.system.cpu-sys ` , value : result . sysMsecs } ,
{ path : ` cloudron.system.blockio-read ` , value : result . blockRead } ,
{ path : ` cloudron.system.blockio-write ` , value : result . blockWrite } ,
{ path : ` cloudron.system.network-read ` , value : result . networkRead } ,
{ path : ` cloudron.system.network-write ` , value : result . networkWrite }
] ;
2025-05-21 16:45:37 +02:00
2025-07-01 11:49:37 +02:00
const dockerMetrics = await readContainerMetrics ( ) ;
2025-05-21 16:45:37 +02:00
for ( const [ name , value ] of Object . entries ( dockerMetrics ) ) {
graphiteMetrics . push (
{ path : ` cloudron.container- ${ name } .network-read ` , value : value . networkRead } ,
{ path : ` cloudron.container- ${ name } .network-write ` , value : value . networkWrite } ,
{ path : ` cloudron.container- ${ name } .blockio-read ` , value : value . blockRead } ,
{ path : ` cloudron.container- ${ name } .blockio-write ` , value : value . blockWrite } ,
2025-07-01 11:49:37 +02:00
{ path : ` cloudron.container- ${ name } .memory-used ` , value : value . memoryUsed } ,
{ path : ` cloudron.container- ${ name } .memory-max ` , value : value . memoryMax } ,
{ path : ` cloudron.container- ${ name } .cpu-usage ` , value : value . cpuUsageMsecs } ,
2025-05-21 16:45:37 +02:00
) ;
}
2025-05-21 16:32:52 +02:00
return new Promise ( ( resolve ) => {
const client = new net . Socket ( ) ;
client . connect ( constants . GRAPHITE _PORT , '127.0.0.1' , ( ) => {
debug ( 'connected to graphite' ) ;
const now = Math . floor ( Date . now ( ) / 1000 ) ;
2025-05-21 16:45:37 +02:00
for ( const metric of graphiteMetrics ) {
2025-05-21 16:32:52 +02:00
client . write ( ` ${ metric . path } ${ metric . value } ${ now } \n ` ) ;
}
client . end ( ) ;
} ) ;
client . on ( 'error' , ( error ) => {
debug ( error ) ;
resolve ( ) ;
} ) ;
client . on ( 'end' , ( ) => {
debug ( 'sent to graphite' ) ;
resolve ( ) ;
} ) ;
} ) ;
}
2022-10-11 12:44:37 +02:00
// for testing locally: curl 'http://${graphite-ip}:8000/graphite-web/render?format=json&from=-1min&target=absolute(collectd.localhost.du-docker.capacity-usage)'
2022-10-11 19:06:26 +02:00
// the datapoint is (value, timestamp) https://graphite.readthedocs.io/en/latest/
2022-10-11 12:44:37 +02:00
async function getGraphiteUrl ( ) {
const [ error , result ] = await safe ( docker . inspect ( 'graphite' ) ) ;
if ( error && error . reason === BoxError . NOT _FOUND ) return { status : exports . SERVICE _STATUS _STOPPED } ;
if ( error ) throw error ;
const ip = safe . query ( result , 'NetworkSettings.Networks.cloudron.IPAddress' , null ) ;
if ( ! ip ) throw new BoxError ( BoxError . INACTIVE , 'Error getting IP of graphite service' ) ;
return ` http:// ${ ip } :8000/graphite-web/render ` ;
}
2022-09-14 13:03:14 +02:00
2025-07-01 09:46:24 +02:00
async function getContainer ( name , options ) {
2022-10-13 20:32:36 +02:00
assert . strictEqual ( typeof name , 'string' ) ;
2025-05-20 19:09:12 +02:00
assert . strictEqual ( typeof options , 'object' ) ;
const { fromSecs , intervalSecs , noNullPoints } = options ;
2022-09-14 13:03:14 +02:00
2022-10-11 12:44:37 +02:00
const graphiteUrl = await getGraphiteUrl ( ) ;
2022-09-14 13:03:14 +02:00
2022-10-10 19:52:29 +02:00
const targets = [
2025-07-01 10:48:51 +02:00
// perSecond is nonNegativeDerivative over time . this value is the cpu usage in msecs .
// (cpu usage msecs) / (cpus * 1000) is the percent but over all cpus. times 100 is the percent.
// but the y-scale is cpus times 100. so, we only need to scale by 0.1
` scale(perSecond(cloudron.container- ${ name } .cpu-usage),0.1) ` ,
2025-07-01 11:49:37 +02:00
` summarize(cloudron.container- ${ name } .memory-used, " ${ intervalSecs } s", "avg") ` ,
// get the rate in interval window
` summarize(perSecond(cloudron.container- ${ name } .blockio-read), " ${ intervalSecs } s", "avg") ` ,
` summarize(perSecond(cloudron.container- ${ name } .blockio-write), " ${ intervalSecs } s", "avg") ` ,
` summarize(perSecond(cloudron.container- ${ name } .network-read), " ${ intervalSecs } s", "avg") ` ,
` summarize(perSecond(cloudron.container- ${ name } .network-write), " ${ intervalSecs } s", "avg") ` ,
// just get the max in interval window for absolute numbers
2025-05-20 19:09:12 +02:00
` summarize(cloudron.container- ${ name } .blockio-read, " ${ intervalSecs } s", "max") ` ,
` summarize(cloudron.container- ${ name } .blockio-write, " ${ intervalSecs } s", "max") ` ,
` summarize(cloudron.container- ${ name } .network-read, " ${ intervalSecs } s", "max") ` ,
` summarize(cloudron.container- ${ name } .network-write, " ${ intervalSecs } s", "max") ` ,
2022-10-10 19:52:29 +02:00
] ;
2022-09-14 13:03:14 +02:00
2022-10-10 19:52:29 +02:00
const results = [ ] ;
2022-09-14 13:03:14 +02:00
2022-10-10 19:52:29 +02:00
for ( const target of targets ) {
const query = {
target : target ,
2022-09-16 09:40:47 +02:00
format : 'json' ,
2025-05-20 19:09:12 +02:00
from : ` - ${ fromSecs } s ` ,
2025-07-04 21:53:13 +02:00
until : 'now+20s' , // until is exclusive. 'now' is otherwise not included
2022-10-10 19:52:29 +02:00
noNullPoints : ! ! noNullPoints
2022-09-16 09:40:47 +02:00
} ;
2022-09-14 13:03:14 +02:00
2025-05-18 16:26:33 +02:00
const [ error , response ] = await safe ( superagent . get ( graphiteUrl ) . query ( query ) . timeout ( 30 * 1000 ) . ok ( ( ) => true ) ) ;
2024-11-19 17:08:19 +05:30
if ( error ) throw new BoxError ( BoxError . NETWORK _ERROR , error ) ;
2022-10-10 19:52:29 +02:00
if ( response . status !== 200 ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Unknown error with ${ target } : ${ response . status } ${ response . text } ` ) ;
2022-09-14 13:03:14 +02:00
2022-10-14 11:15:27 +02:00
results . push ( response . body [ 0 ] && response . body [ 0 ] . datapoints ? response . body [ 0 ] . datapoints : [ ] ) ;
2022-09-16 09:40:47 +02:00
}
2022-09-14 13:03:14 +02:00
2022-10-14 11:15:27 +02:00
// results are datapoints[[value, ts], [value, ts], ...];
2022-10-10 19:52:29 +02:00
return {
cpu : results [ 0 ] ,
memory : results [ 1 ] ,
2025-07-01 15:43:49 +02:00
blockReadRate : results [ 2 ] ,
blockWriteRate : results [ 3 ] ,
networkReadRate : results [ 4 ] ,
networkWriteRate : results [ 5 ] ,
2022-10-14 12:00:24 +02:00
blockReadTotal : results [ 6 ] [ 0 ] && results [ 6 ] [ 0 ] [ 0 ] ? results [ 6 ] [ 0 ] [ 0 ] : 0 ,
blockWriteTotal : results [ 7 ] [ 0 ] && results [ 7 ] [ 0 ] [ 0 ] ? results [ 7 ] [ 0 ] [ 0 ] : 0 ,
networkReadTotal : results [ 8 ] [ 0 ] && results [ 8 ] [ 0 ] [ 0 ] ? results [ 8 ] [ 0 ] [ 0 ] : 0 ,
networkWriteTotal : results [ 9 ] [ 0 ] && results [ 9 ] [ 0 ] [ 0 ] ? results [ 9 ] [ 0 ] [ 0 ] : 0 ,
2022-10-10 19:52:29 +02:00
} ;
2022-09-14 13:03:14 +02:00
}
2022-09-15 11:28:41 +02:00
2025-05-21 17:15:04 +02:00
async function readSystemFromGraphite ( options ) {
2025-05-20 19:09:12 +02:00
assert . strictEqual ( typeof options , 'object' ) ;
const { fromSecs , intervalSecs , noNullPoints } = options ;
2022-09-15 11:28:41 +02:00
2022-10-11 12:44:37 +02:00
const graphiteUrl = await getGraphiteUrl ( ) ;
2022-09-15 11:28:41 +02:00
2025-05-20 22:31:26 +02:00
// example: curl 'http://172.18.30.5:8000/graphite-web/render?target=cloudron.system.cpu-user&target=cloudron.system.cpu-sys&format=json&from=-1min&until=now&noNullPoints=false' | python3 -m json.tool
2025-05-18 16:26:33 +02:00
const targets = [
2025-05-20 22:31:26 +02:00
// perSecond is nonNegativeDerivative over time . this value is the cpu usage in msecs .
// (cpu usage msecs) / (cpus * 1000) is the percent but over all cpus. times 100 is the percent.
// but the y-scale is cpus times 100. so, we only need to scale by 0.1
` scale(perSecond(sumSeries(cloudron.system.cpu-user,cloudron.system.cpu-sys)),0.1) ` ,
2025-05-22 10:21:21 +02:00
` summarize(cloudron.system.memory-used, " ${ intervalSecs } s", "avg") ` ,
` summarize(cloudron.system.swap-used, " ${ intervalSecs } s", "avg") ` ,
2025-07-01 15:43:49 +02:00
// get the rate in interval window
` summarize(perSecond(cloudron.system.blockio-read), " ${ intervalSecs } s", "avg") ` ,
` summarize(perSecond(cloudron.system.blockio-write), " ${ intervalSecs } s", "avg") ` ,
` summarize(perSecond(cloudron.system.network-read), " ${ intervalSecs } s", "avg") ` ,
` summarize(perSecond(cloudron.system.network-write), " ${ intervalSecs } s", "avg") ` ,
// just get the max in interval window for absolute numbers
` summarize(cloudron.system.blockio-read, " ${ intervalSecs } s", "max") ` ,
` summarize(cloudron.system.blockio-write, " ${ intervalSecs } s", "max") ` ,
` summarize(cloudron.system.network-read, " ${ intervalSecs } s", "max") ` ,
` summarize(cloudron.system.network-write, " ${ intervalSecs } s", "max") ` ,
2025-05-18 16:26:33 +02:00
] ;
const results = [ ] ;
for ( const target of targets ) {
const query = {
target : target ,
format : 'json' ,
2025-05-20 19:09:12 +02:00
from : ` - ${ fromSecs } s ` ,
2025-05-18 16:26:33 +02:00
until : 'now' ,
noNullPoints : ! ! noNullPoints
} ;
2022-09-15 11:28:41 +02:00
2025-05-18 16:26:33 +02:00
const [ error , response ] = await safe ( superagent . get ( graphiteUrl ) . query ( query ) . timeout ( 30 * 1000 ) . ok ( ( ) => true ) ) ;
if ( error ) throw new BoxError ( BoxError . NETWORK _ERROR , error ) ;
if ( response . status !== 200 ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Unknown error with ${ target } : ${ response . status } ${ response . text } ` ) ;
results . push ( response . body [ 0 ] && response . body [ 0 ] . datapoints ? response . body [ 0 ] . datapoints : [ ] ) ;
}
return {
cpu : results [ 0 ] ,
2025-05-22 10:21:21 +02:00
memory : results [ 1 ] ,
2025-07-01 15:43:49 +02:00
swap : results [ 2 ] ,
blockReadRate : results [ 3 ] ,
blockWriteRate : results [ 4 ] ,
networkReadRate : results [ 5 ] ,
networkWriteRate : results [ 6 ] ,
blockReadTotal : results [ 7 ] [ 0 ] && results [ 7 ] [ 0 ] [ 0 ] ? results [ 7 ] [ 0 ] [ 0 ] : 0 ,
blockWriteTotal : results [ 8 ] [ 0 ] && results [ 8 ] [ 0 ] [ 0 ] ? results [ 8 ] [ 0 ] [ 0 ] : 0 ,
networkReadTotal : results [ 9 ] [ 0 ] && results [ 9 ] [ 0 ] [ 0 ] ? results [ 9 ] [ 0 ] [ 0 ] : 0 ,
networkWriteTotal : results [ 10 ] [ 0 ] && results [ 10 ] [ 0 ] [ 0 ] ? results [ 10 ] [ 0 ] [ 0 ] : 0 ,
2022-09-15 11:28:41 +02:00
} ;
2025-05-18 16:26:33 +02:00
}
2025-07-03 17:31:49 +02:00
// CPU: stress --cpu 2 --timeout 60
2025-07-04 12:51:51 +02:00
// Memory: stress --vm 2 --vm-bytes 256M
2025-07-04 13:18:23 +02:00
// Network:
// raw stats: ip -s link show eth0
// testing: curl -o /dev/null https://ash-speed.hetzner.com/10GB.bin and then use nethogs eth0 (cycle with 'm')
2025-07-04 14:26:09 +02:00
// Disk:
// writing: fio --name=rate-test --filename=tempfile --rw=write --bs=4k --ioengine=libaio --rate=20M --size=5000M --runtime=150 --direct=1. test with iotop
// reading: fio --name=rate-test --filename=tempfile --rw=read --bs=4k --ioengine=libaio --rate=20M --size=5000M --runtime=150 --direct=1. test with iotop
2025-07-07 15:53:09 +02:00
async function get ( options ) {
2025-05-20 19:09:12 +02:00
assert . strictEqual ( typeof options , 'object' ) ;
2022-09-15 11:28:41 +02:00
2025-07-07 10:09:36 +02:00
const result = { } ;
2022-09-15 11:28:41 +02:00
2025-07-07 10:09:36 +02:00
if ( options . system ) result . system = await readSystemFromGraphite ( options ) ;
for ( const appId of options . appIds ) {
result [ appId ] = await getContainer ( appId , options ) ;
2022-10-12 22:08:10 +02:00
}
2025-07-07 10:09:36 +02:00
for ( const serviceId of options . serviceIds ) {
result [ serviceId ] = await getContainer ( serviceId , options ) ;
2022-10-13 20:32:36 +02:00
}
2025-07-07 10:09:36 +02:00
return result ;
2022-09-15 11:28:41 +02:00
}
2025-05-21 17:15:04 +02:00
2025-07-07 15:53:09 +02:00
async function pipeContainerMetrics ( name , metricsStream , ac ) {
assert . strictEqual ( typeof name , 'string' ) ;
assert . strictEqual ( typeof metricsStream , 'object' ) ;
assert . strictEqual ( typeof ac , 'object' ) ;
2025-05-22 11:17:31 +02:00
2025-07-07 15:53:09 +02:00
let oldMetrics = null ;
2025-05-21 17:15:04 +02:00
2025-07-07 15:53:09 +02:00
// we used to poll before instead of a stream. but docker caches metrics internally and rate logic has to handle dups
const [ error , statsStream ] = await safe ( docker . getStats ( name , { stream : true } ) ) ;
if ( error ) return ; // container stopped or missing, silently ignore
statsStream . on ( 'error' , ( error ) => debug ( error ) ) ;
statsStream . on ( 'data' , ( data ) => {
const stats = JSON . parse ( data . toString ( 'utf8' ) ) ;
const metrics = translateContainerStatsSync ( stats ) ;
if ( ! metrics ) return ; // maybe the container stopped
const { ts , networkRead , networkWrite , blockRead , blockWrite , memoryUsed , cpuUsageMsecs } = metrics ;
const gap = oldMetrics ? ( ts - oldMetrics . ts ) : null ;
const cpuPercent = oldMetrics ? ( cpuUsageMsecs - oldMetrics . cpuUsageMsecs ) * 100 / gap : null ;
const blockReadRate = oldMetrics ? ( blockRead - oldMetrics . blockRead ) / ( gap / 1000 ) : null ;
const blockWriteRate = oldMetrics ? ( blockWrite - oldMetrics . blockWrite ) / ( gap / 1000 ) : null ;
const networkReadRate = oldMetrics ? ( networkRead - oldMetrics . networkRead ) / ( gap / 1000 ) : null ;
const networkWriteRate = oldMetrics ? ( networkWrite - oldMetrics . networkWrite ) / ( gap / 1000 ) : null ;
oldMetrics = metrics ;
const nowSecs = ts . getTime ( ) / 1000 ; // conver to secs to match graphite return value
const result = { } ;
result [ name ] = {
cpu : [ cpuPercent , nowSecs ] ,
memory : [ memoryUsed , nowSecs ] ,
blockReadRate : [ blockReadRate , nowSecs ] ,
blockWriteRate : [ blockWriteRate , nowSecs ] ,
blockReadTotal : metrics . blockRead ,
blockWriteTotal : metrics . blockWrite ,
networkReadRate : [ networkReadRate , nowSecs ] ,
networkWriteRate : [ networkWriteRate , nowSecs ] ,
networkReadTotal : metrics . networkRead ,
networkWriteTotal : metrics . networkWrite ,
} ;
metricsStream . push ( result ) ;
2025-05-21 17:15:04 +02:00
} ) ;
2025-07-07 15:53:09 +02:00
ac . signal . addEventListener ( 'abort' , ( ) => { // there is event.type and ac.signal.reason
statsStream . destroy ( ac . signal . reason ) ;
} , { once : true } ) ;
}
async function pipeSystemMetrics ( metricsStream , ac ) {
assert . strictEqual ( typeof metricsStream , 'object' ) ;
assert . strictEqual ( typeof ac , 'object' ) ;
const INTERVAL _MSECS = 1000 ;
let oldMetrics = null ;
const intervalId = setInterval ( async ( ) => {
2025-07-01 15:43:49 +02:00
const [ error , metrics ] = await safe ( readSystemMetrics ( ) ) ;
if ( error ) return metricsStream . destroy ( error ) ;
2025-07-01 11:49:37 +02:00
2025-07-04 23:20:13 +02:00
const cpuPercent = oldMetrics ? ( metrics . userMsecs + metrics . sysMsecs - oldMetrics . userMsecs - oldMetrics . sysMsecs ) * 100 / INTERVAL _MSECS : null ;
const blockReadRate = oldMetrics ? ( metrics . blockRead - oldMetrics . blockRead ) / ( INTERVAL _MSECS / 1000 ) : null ;
const blockWriteRate = oldMetrics ? ( metrics . blockWrite - oldMetrics . blockWrite ) / ( INTERVAL _MSECS / 1000 ) : null ;
const networkReadRate = oldMetrics ? ( metrics . networkRead - oldMetrics . networkRead ) / ( INTERVAL _MSECS / 1000 ) : null ;
const networkWriteRate = oldMetrics ? ( metrics . networkWrite - oldMetrics . networkWrite ) / ( INTERVAL _MSECS / 1000 ) : null ;
2025-05-21 17:15:04 +02:00
2025-07-01 15:43:49 +02:00
oldMetrics = metrics ;
2025-05-21 17:15:04 +02:00
2025-07-01 22:32:59 +02:00
const nowSecs = Date . now ( ) / 1000 ; // to match graphite return value
2025-07-07 15:53:09 +02:00
const systemStats = {
2025-07-01 22:32:59 +02:00
cpu : [ cpuPercent , nowSecs ] ,
memory : [ metrics . memoryUsed , nowSecs ] ,
swap : [ metrics . swapUsed , nowSecs ] ,
blockReadRate : [ blockReadRate , nowSecs ] ,
blockWriteRate : [ blockWriteRate , nowSecs ] ,
blockReadTotal : metrics . blockRead ,
blockWriteTotal : metrics . blockWrite ,
networkReadRate : [ networkReadRate , nowSecs ] ,
networkWriteRate : [ networkWriteRate , nowSecs ] ,
networkReadTotal : metrics . networkRead ,
networkWriteTotal : metrics . networkWrite ,
2025-07-07 15:53:09 +02:00
} ;
metricsStream . push ( { system : systemStats } ) ;
2025-07-04 23:20:13 +02:00
} , INTERVAL _MSECS ) ;
2025-05-21 17:15:04 +02:00
2025-07-07 15:53:09 +02:00
ac . signal . addEventListener ( 'abort' , ( ) => { // there is event.type and ac.signal.reason
clearInterval ( intervalId ) ;
} , { once : true } ) ;
2025-05-21 17:15:04 +02:00
}
2025-07-01 11:49:37 +02:00
2025-07-07 15:53:09 +02:00
async function getStream ( options ) {
2025-07-01 11:49:37 +02:00
assert . strictEqual ( typeof options , 'object' ) ;
2025-07-07 15:53:09 +02:00
const ac = new AbortController ( ) ;
2025-07-01 11:49:37 +02:00
const metricsStream = new Readable ( {
2025-07-04 22:42:05 +02:00
objectMode : true ,
2025-07-01 11:49:37 +02:00
read ( /*size*/ ) { /* ignored, we push via interval */ } ,
destroy ( error , callback ) {
2025-07-07 15:53:09 +02:00
ac . abort ( error ) ;
2025-07-01 11:49:37 +02:00
callback ( error ) ;
}
} ) ;
2025-07-07 15:53:09 +02:00
if ( options . system ) pipeSystemMetrics ( metricsStream , ac ) ;
for ( const appId of options . appIds ) {
pipeContainerMetrics ( appId , metricsStream , ac ) ;
}
2025-07-01 22:32:59 +02:00
2025-07-07 15:53:09 +02:00
for ( const serviceId of options . serviceIds ) {
pipeContainerMetrics ( serviceId , metricsStream , ac ) ;
}
2025-07-01 11:49:37 +02:00
return metricsStream ;
}