Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0aaf3b418f | |||
| 334472bf25 | |||
| e9fb5d7e60 | |||
| 43c3a4f781 | |||
| 6cc07cd005 |
@@ -2238,3 +2238,11 @@
|
||||
* sftp: fix rebuild condition
|
||||
* Fix display of user management/dashboard visiblity for email apps
|
||||
* graphite: disable tagdb and reduce log noise
|
||||
|
||||
[6.2.6]
|
||||
* Fix issue where collectd is restarted too quickly before graphite
|
||||
|
||||
[6.2.7]
|
||||
* collectd: restart on graphite upgrade
|
||||
|
||||
*
|
||||
|
||||
@@ -164,7 +164,9 @@ LoadPlugin swap
|
||||
#LoadPlugin vmem
|
||||
#LoadPlugin vserver
|
||||
#LoadPlugin wireless
|
||||
LoadPlugin write_graphite
|
||||
<LoadPlugin write_graphite>
|
||||
FlushInterval 20
|
||||
</LoadPlugin>
|
||||
#LoadPlugin write_http
|
||||
#LoadPlugin write_riemann
|
||||
|
||||
|
||||
@@ -6,19 +6,26 @@ PATHS = [] # { name, dir, exclude }
|
||||
# there is a pattern in carbon/storage-schemas.conf which stores values every 12h for a year
|
||||
INTERVAL = 60 * 60 * 12 # twice a day. change values in docker-graphite if you change this
|
||||
|
||||
# we used to pass the INTERVAL as a parameter to register_read. however, collectd write_graphite
|
||||
# takes a bit to load (tcp connection) and drops the du data. this then means that we have to wait
|
||||
# for INTERVAL secs for du data. instead, we just cache the value for INTERVAL instead
|
||||
CACHE = dict()
|
||||
CACHE_TIME = 0
|
||||
|
||||
def du(pathinfo):
|
||||
# -B1 makes du print block sizes and not apparent sizes (to match df which also uses block sizes)
|
||||
cmd = 'timeout 1800 du -DsB1 "{}"'.format(pathinfo['dir'])
|
||||
dirname = pathinfo['dir']
|
||||
cmd = 'timeout 1800 du -DsB1 "{}"'.format(dirname)
|
||||
if pathinfo['exclude'] != '':
|
||||
cmd += ' --exclude "{}"'.format(pathinfo['exclude'])
|
||||
|
||||
collectd.info('computing size with command: %s' % cmd);
|
||||
try:
|
||||
size = subprocess.check_output(cmd, shell=True).split()[0].decode('utf-8')
|
||||
collectd.info('\tsize of %s is %s (time: %i)' % (pathinfo['dir'], size, int(time.time())))
|
||||
collectd.info('\tsize of %s is %s (time: %i)' % (dirname, size, int(time.time())))
|
||||
return size
|
||||
except Exception as e:
|
||||
collectd.info('\terror getting the size of %s: %s' % (pathinfo['dir'], str(e)))
|
||||
collectd.info('\terror getting the size of %s: %s' % (dirname, str(e)))
|
||||
return 0
|
||||
|
||||
def parseSize(size):
|
||||
@@ -64,19 +71,35 @@ def init():
|
||||
collectd.info('custom du plugin initialized with %s %s' % (PATHS, sys.version))
|
||||
|
||||
def read():
|
||||
global CACHE, CACHE_TIME
|
||||
|
||||
# read from cache if < 12 hours
|
||||
read_cache = (time.time() - CACHE_TIME) < INTERVAL
|
||||
|
||||
if not read_cache:
|
||||
CACHE_TIME = time.time()
|
||||
|
||||
for pathinfo in PATHS:
|
||||
size = du(pathinfo)
|
||||
dirname = pathinfo['dir']
|
||||
if read_cache and dirname in CACHE:
|
||||
size = CACHE[dirname]
|
||||
else:
|
||||
size = du(pathinfo)
|
||||
CACHE[dirname] = size
|
||||
|
||||
# type comes from https://github.com/collectd/collectd/blob/master/src/types.db
|
||||
val = collectd.Values(type='capacity', plugin='du', plugin_instance=pathinfo['name'])
|
||||
val.dispatch(values=[size], type_instance='usage')
|
||||
|
||||
size = dockerSize()
|
||||
if read_cache and 'docker' in CACHE:
|
||||
size = CACHE['docker']
|
||||
else:
|
||||
size = dockerSize()
|
||||
CACHE['docker'] = size
|
||||
|
||||
val = collectd.Values(type='capacity', plugin='du', plugin_instance='docker')
|
||||
val.dispatch(values=[size], type_instance='usage')
|
||||
|
||||
|
||||
|
||||
collectd.register_init(init)
|
||||
collectd.register_config(configure)
|
||||
collectd.register_read(read, INTERVAL)
|
||||
collectd.register_read(read)
|
||||
|
||||
+11
-5
@@ -1821,7 +1821,12 @@ function startGraphite(existingInfra, serviceConfig, callback) {
|
||||
shell.sudo('removeGraphiteDir', [ RMADDONDIR_CMD, 'graphite' ], {}, done);
|
||||
},
|
||||
shell.exec.bind(null, 'startGraphite', cmd)
|
||||
], callback);
|
||||
], function (error) {
|
||||
// restart collectd to get the disk stats after graphite starts. currently, there is no way to do graphite health check
|
||||
if (!error) setTimeout(() => shell.sudo('restartcollectd', [ RESTART_SERVICE_CMD, 'collectd' ], {}, NOOP_CALLBACK), 60000);
|
||||
|
||||
callback(error);
|
||||
});
|
||||
}
|
||||
|
||||
function setupProxyAuth(app, options, callback) {
|
||||
@@ -1863,6 +1868,10 @@ function startRedis(existingInfra, callback) {
|
||||
const redisName = 'redis-' + app.id;
|
||||
|
||||
async.series([
|
||||
(done) => {
|
||||
if (!upgrading) return done();
|
||||
backupRedis(app, {}, done);
|
||||
},
|
||||
shell.exec.bind(null, 'stopRedis', `docker stop ${redisName} || true`), // redis will backup as part of signal handling
|
||||
shell.exec.bind(null, 'removeRedis', `docker rm -f ${redisName} || true`),
|
||||
setupRedis.bind(null, app, app.manifest.addons.redis) // starts the container
|
||||
@@ -2152,10 +2161,7 @@ function restartGraphite(callback) {
|
||||
|
||||
docker.restartContainer('graphite', callback);
|
||||
|
||||
setTimeout(function () {
|
||||
// wait for graphite to startup and then restart collectd. graphite migrations can be quite slow!
|
||||
shell.sudo('restartcollectd', [ RESTART_SERVICE_CMD, 'collectd' ], {}, NOOP_CALLBACK);
|
||||
}, 60000);
|
||||
setTimeout(() => shell.sudo('restartcollectd', [ RESTART_SERVICE_CMD, 'collectd' ], {}, NOOP_CALLBACK), 60000);
|
||||
}
|
||||
|
||||
function teardownOauth(app, options, callback) {
|
||||
|
||||
Reference in New Issue
Block a user