Merge apphealthtask into box server

We used to run this as a separate process but no amount of node/v8 tweaking
makes them run as standalone with 50M RSS.

Three solutions were considered for the memory issue:
1. Use systemd timer. apphealthtask needs to run quiet frequently (10 sec)
   for the ui to get the app health update immediately after install.

2. Merge into box server (this commit)

3. Increase memory to 80M. This seems to make apphealthtask run as-is.
This commit is contained in:
Girish Ramakrishnan
2015-09-14 10:52:11 -07:00
parent f028649582
commit 1cd9d07d8c
4 changed files with 25 additions and 60 deletions
+133
View File
@@ -0,0 +1,133 @@
#!/usr/bin/env node
'use strict';
require('supererror')({ splatchError: true });
var appdb = require('./appdb.js'),
assert = require('assert'),
async = require('async'),
DatabaseError = require('./databaseerror.js'),
debug = require('debug')('box:apphealthmonitor'),
docker = require('./docker.js'),
mailer = require('./mailer.js'),
superagent = require('superagent'),
util = require('util');
exports = module.exports = {
start: start
};
var HEALTHCHECK_INTERVAL = 10 * 1000; // every 10 seconds. this needs to be small since the UI makes only healthy apps clickable
var UNHEALTHY_THRESHOLD = 3 * 60 * 1000; // 3 minutes
var gHealthInfo = { }; // { time, emailSent }
function debugApp(app) {
assert(!app || typeof app === 'object');
var prefix = app ? app.location : '(no app)';
debug(prefix + ' ' + util.format.apply(util, Array.prototype.slice.call(arguments, 1)));
}
function setHealth(app, health, callback) {
assert.strictEqual(typeof app, 'object');
assert.strictEqual(typeof health, 'string');
assert.strictEqual(typeof callback, 'function');
var now = new Date();
if (!(app.id in gHealthInfo)) { // add new apps to list
gHealthInfo[app.id] = { time: now, emailSent: false };
}
if (health === appdb.HEALTH_HEALTHY) {
gHealthInfo[app.id].time = now;
} else if (Math.abs(now - gHealthInfo[app.id].time) > UNHEALTHY_THRESHOLD) {
if (gHealthInfo[app.id].emailSent) return callback(null);
debugApp(app, 'marking as unhealthy since not seen for more than %s minutes', UNHEALTHY_THRESHOLD/(60 * 1000));
mailer.appDied(app);
gHealthInfo[app.id].emailSent = true;
} else {
debugApp(app, 'waiting for sometime to update the app health');
return callback(null);
}
appdb.setHealth(app.id, health, function (error) {
if (error && error.reason === DatabaseError.NOT_FOUND) return callback(null); // app uninstalled?
if (error) return callback(error);
app.health = health;
callback(null);
});
}
// callback is called with error for fatal errors and not if health check failed
function checkAppHealth(app, callback) {
if (app.installationState !== appdb.ISTATE_INSTALLED || app.runState !== appdb.RSTATE_RUNNING) {
debugApp(app, 'skipped. istate:%s rstate:%s', app.installationState, app.runState);
return callback(null);
}
var container = docker.getContainer(app.containerId),
manifest = app.manifest;
container.inspect(function (err, data) {
if (err || !data || !data.State) {
debugApp(app, 'Error inspecting container');
return setHealth(app, appdb.HEALTH_ERROR, callback);
}
if (data.State.Running !== true) {
debugApp(app, 'exited');
return setHealth(app, appdb.HEALTH_DEAD, callback);
}
// poll through docker network instead of nginx to bypass any potential oauth proxy
var healthCheckUrl = 'http://127.0.0.1:' + app.httpPort + manifest.healthCheckPath;
superagent
.get(healthCheckUrl)
.redirects(0)
.timeout(HEALTHCHECK_INTERVAL)
.end(function (error, res) {
if (error || res.status >= 400) { // 2xx and 3xx are ok
debugApp(app, 'not alive : %s', error || res.status);
setHealth(app, appdb.HEALTH_UNHEALTHY, callback);
} else {
debugApp(app, 'alive');
setHealth(app, appdb.HEALTH_HEALTHY, callback);
}
});
});
}
function processApps(callback) {
appdb.getAll(function (error, apps) {
if (error) return callback(error);
async.each(apps, checkAppHealth, function (error) {
if (error) console.error(error);
callback(null);
});
});
}
function run() {
processApps(function (error) {
if (error) console.error(error);
setTimeout(run, HEALTHCHECK_INTERVAL);
});
}
function start(callback) {
assert.strictEqual(typeof callback, 'function');
debug('Starting apphealthmonitor');
run();
callback();
}