Files
cloudron-box/setup/start/collectd/du.py
Girish Ramakrishnan c1ee3dcbd4 collectd: cache du values and send it every Interval (20)
collectd plugin ordering matters. the write_graphite plugin establishes
a TCP connection but there is a race between that and the df/du values that
get reported. du is especially problematic since we report this only every 12 hours.

so, instead we cache the values and report it every 20 seconds. on the carbon side,
it will just retain every 12 hours (since that is the whisper retention period).

there is also FlushInterval which I am not 100% sure has any effect. by default, the
write_graphite plugin waits for 1428 bytes to be accumulated. (https://manpages.debian.org/unstable/collectd-core/collectd.conf.5.en.html)

https://github.com/collectd/collectd/issues/2672
https://github.com/collectd/collectd/pull/1044

I found this syntax hidden deep inside https://www.cisco.com/c/en/us/td/docs/net_mgmt/virtual_topology_system/2_6_3/user_guide/Cisco_VTS_2_6_3_User_Guide/Cisco_VTS_2_6_1_User_Guide_chapter_01111.pdf
2021-03-26 00:21:38 -07:00

106 lines
4.0 KiB
Python

import collectd,os,subprocess,sys,re,time
# https://www.programcreek.com/python/example/106897/collectd.register_read
PATHS = [] # { name, dir, exclude }
# there is a pattern in carbon/storage-schemas.conf which stores values every 12h for a year
INTERVAL = 60 * 60 * 12 # twice a day. change values in docker-graphite if you change this
# we used to pass the INTERVAL as a parameter to register_read. however, collectd write_graphite
# takes a bit to load (tcp connection) and drops the du data. this then means that we have to wait
# for INTERVAL secs for du data. instead, we just cache the value for INTERVAL instead
CACHE = dict()
CACHE_TIME = 0
def du(pathinfo):
# -B1 makes du print block sizes and not apparent sizes (to match df which also uses block sizes)
dirname = pathinfo['dir']
cmd = 'timeout 1800 du -DsB1 "{}"'.format(dirname)
if pathinfo['exclude'] != '':
cmd += ' --exclude "{}"'.format(pathinfo['exclude'])
collectd.info('computing size with command: %s' % cmd);
try:
size = subprocess.check_output(cmd, shell=True).split()[0].decode('utf-8')
collectd.info('\tsize of %s is %s (time: %i)' % (dirname, size, int(time.time())))
return size
except Exception as e:
collectd.info('\terror getting the size of %s: %s' % (dirname, str(e)))
return 0
def parseSize(size):
units = {"B": 1, "KB": 10**3, "MB": 10**6, "GB": 10**9, "TB": 10**12}
number, unit, _ = re.split('([a-zA-Z]+)', size.upper())
return int(float(number)*units[unit])
def dockerSize():
# use --format '{{json .}}' to dump the string. '{{if eq .Type "Images"}}{{.Size}}{{end}}' still creates newlines
# https://godoc.org/github.com/docker/go-units#HumanSize is used. so it's 1000 (KB) and not 1024 (KiB)
cmd = 'timeout 1800 docker system df --format "{{.Size}}" | head -n1'
try:
size = subprocess.check_output(cmd, shell=True).strip().decode('utf-8')
collectd.info('size of docker images is %s (%s) (time: %i)' % (size, parseSize(size), int(time.time())))
return parseSize(size)
except Exception as e:
collectd.info('error getting docker images size : %s' % str(e))
return 0
# configure is called for each module block. this is called before init
def configure(config):
global PATHS
for child in config.children:
if child.key != 'Path':
collectd.info('du plugin: Unknown config key "%s"' % key)
continue
pathinfo = { 'name': '', 'dir': '', 'exclude': '' }
for node in child.children:
if node.key == 'Instance':
pathinfo['name'] = node.values[0]
elif node.key == 'Dir':
pathinfo['dir'] = node.values[0]
elif node.key == 'Exclude':
pathinfo['exclude'] = node.values[0]
PATHS.append(pathinfo);
collectd.info('du plugin: monitoring %s' % pathinfo['dir']);
def init():
global PATHS
collectd.info('custom du plugin initialized with %s %s' % (PATHS, sys.version))
def read():
global CACHE, CACHE_TIME
# read from cache if < 12 hours
read_cache = (time.time() - CACHE_TIME) < INTERVAL
if not read_cache:
CACHE_TIME = time.time()
for pathinfo in PATHS:
dirname = pathinfo['dir']
if read_cache and dirname in CACHE:
size = CACHE[dirname]
else:
size = du(pathinfo)
CACHE[dirname] = size
# type comes from https://github.com/collectd/collectd/blob/master/src/types.db
val = collectd.Values(type='capacity', plugin='du', plugin_instance=pathinfo['name'])
val.dispatch(values=[size], type_instance='usage')
if read_cache and 'docker' in CACHE:
size = CACHE['docker']
else:
size = dockerSize()
CACHE['docker'] = size
val = collectd.Values(type='capacity', plugin='du', plugin_instance='docker')
val.dispatch(values=[size], type_instance='usage')
collectd.register_init(init)
collectd.register_config(configure)
collectd.register_read(read)