diff --git a/setup/start/collectd/collectd.conf b/setup/start/collectd/collectd.conf index b11151bca..695a22436 100644 --- a/setup/start/collectd/collectd.conf +++ b/setup/start/collectd/collectd.conf @@ -164,7 +164,9 @@ LoadPlugin swap #LoadPlugin vmem #LoadPlugin vserver #LoadPlugin wireless -LoadPlugin write_graphite + + FlushInterval 20 + #LoadPlugin write_http #LoadPlugin write_riemann diff --git a/setup/start/collectd/du.py b/setup/start/collectd/du.py index e0fa890ff..006a953eb 100644 --- a/setup/start/collectd/du.py +++ b/setup/start/collectd/du.py @@ -6,19 +6,26 @@ PATHS = [] # { name, dir, exclude } # there is a pattern in carbon/storage-schemas.conf which stores values every 12h for a year INTERVAL = 60 * 60 * 12 # twice a day. change values in docker-graphite if you change this +# we used to pass the INTERVAL as a parameter to register_read. however, collectd write_graphite +# takes a bit to load (tcp connection) and drops the du data. this then means that we have to wait +# for INTERVAL secs for du data. instead, we just cache the value for INTERVAL instead +CACHE = dict() +CACHE_TIME = 0 + def du(pathinfo): # -B1 makes du print block sizes and not apparent sizes (to match df which also uses block sizes) - cmd = 'timeout 1800 du -DsB1 "{}"'.format(pathinfo['dir']) + dirname = pathinfo['dir'] + cmd = 'timeout 1800 du -DsB1 "{}"'.format(dirname) if pathinfo['exclude'] != '': cmd += ' --exclude "{}"'.format(pathinfo['exclude']) collectd.info('computing size with command: %s' % cmd); try: size = subprocess.check_output(cmd, shell=True).split()[0].decode('utf-8') - collectd.info('\tsize of %s is %s (time: %i)' % (pathinfo['dir'], size, int(time.time()))) + collectd.info('\tsize of %s is %s (time: %i)' % (dirname, size, int(time.time()))) return size except Exception as e: - collectd.info('\terror getting the size of %s: %s' % (pathinfo['dir'], str(e))) + collectd.info('\terror getting the size of %s: %s' % (dirname, str(e))) return 0 def parseSize(size): @@ -64,19 +71,35 @@ def init(): collectd.info('custom du plugin initialized with %s %s' % (PATHS, sys.version)) def read(): + global CACHE, CACHE_TIME + + # read from cache if < 12 hours + read_cache = (time.time() - CACHE_TIME) < INTERVAL + + if not read_cache: + CACHE_TIME = time.time() + for pathinfo in PATHS: - size = du(pathinfo) + dirname = pathinfo['dir'] + if read_cache and dirname in CACHE: + size = CACHE[dirname] + else: + size = du(pathinfo) + CACHE[dirname] = size # type comes from https://github.com/collectd/collectd/blob/master/src/types.db val = collectd.Values(type='capacity', plugin='du', plugin_instance=pathinfo['name']) val.dispatch(values=[size], type_instance='usage') - size = dockerSize() + if read_cache and 'docker' in CACHE: + size = CACHE['docker'] + else: + size = dockerSize() + CACHE['docker'] = size + val = collectd.Values(type='capacity', plugin='du', plugin_instance='docker') val.dispatch(values=[size], type_instance='usage') - - collectd.register_init(init) collectd.register_config(configure) -collectd.register_read(read, INTERVAL) +collectd.register_read(read)