Compare commits

..

19 Commits

Author SHA1 Message Date
Girish Ramakrishnan 20d6da8230 add debugs 2016-04-20 19:40:58 -07:00
Girish Ramakrishnan f159cacfbb Use same timestamp for archive and config
This fixes a very curious case:
1. App has backup.
2. App dies.
3. Box backs up. This make it reuse the backup. But it generates wrong config file timestamp.
4. Box cannot update anymore. This is because the backup of app fails - it tries to reuse
   the backup and that fails with AccessDenied because the timestamp above is wrong!
2016-04-20 19:37:00 -07:00
Girish Ramakrishnan 5e9ea98b66 ignore apps in errored state 2016-04-20 19:05:49 -07:00
Girish Ramakrishnan d87b7dcb75 fix typo 2016-04-20 12:56:35 -07:00
Girish Ramakrishnan 6eea2fef9a retry fetching icon
e2e randomly fails with EAI_AGAIN
2016-04-20 00:40:22 -07:00
Girish Ramakrishnan 34fd5f14a5 0.12.5 changes 2016-04-19 21:52:24 -07:00
Girish Ramakrishnan a4e73e747c fix crash mail subject 2016-04-19 19:12:47 -07:00
Girish Ramakrishnan eadff099eb send logs when apptask fails 2016-04-19 18:40:46 -07:00
Girish Ramakrishnan 15653cb3f8 rename to logcollector 2016-04-19 18:13:05 -07:00
Girish Ramakrishnan 2f8dc35c5d rename to sendFailureLogs 2016-04-19 18:06:11 -07:00
Girish Ramakrishnan a97720d204 rename to failure_notification 2016-04-19 18:04:45 -07:00
Girish Ramakrishnan 73898505b0 remove jslint header 2016-04-19 16:59:12 -07:00
Girish Ramakrishnan 88b4b6a38b use the crashnotifier module 2016-04-19 16:47:21 -07:00
Girish Ramakrishnan 3da82e3a63 rename to crashnotifierservice 2016-04-19 16:45:05 -07:00
Girish Ramakrishnan dad1585704 send crash notification on apptask crash 2016-04-19 16:43:58 -07:00
Girish Ramakrishnan e81dbdb36c add crashnotifier module 2016-04-19 16:42:05 -07:00
Girish Ramakrishnan ee2478e500 collect last 300 lines 2016-04-19 16:39:28 -07:00
Girish Ramakrishnan 0f7a6964a4 0.12.4 changes 2016-04-19 16:24:37 -07:00
Girish Ramakrishnan 5fa974ffe6 wait for 30 seconds in taskmanager instead
problem: because the apps are not inserted into appdb, the cloudron starts out
with an empty view. apps appear suddenly after 30 seconds.

besides, it makes more sense because 30 secs is not really tied to first run
2016-04-19 13:56:41 -07:00
13 changed files with 103 additions and 63 deletions
+6
View File
@@ -477,3 +477,9 @@
[0.12.3]
- LDAP: Do not set sn attribute when user has no surname
[0.12.4]
- Install app only after platform is ready
[0.12.5]
- Get alerts for app task failures
-42
View File
@@ -1,42 +0,0 @@
#!/usr/bin/env node
'use strict';
var assert = require('assert'),
mailer = require('./src/mailer.js'),
safe = require('safetydance'),
path = require('path'),
util = require('util');
var COLLECT_LOGS_CMD = path.join(__dirname, 'src/scripts/collectlogs.sh');
function collectLogs(program, callback) {
assert.strictEqual(typeof program, 'string');
assert.strictEqual(typeof callback, 'function');
var logs = safe.child_process.execSync('sudo ' + COLLECT_LOGS_CMD + ' ' + program, { encoding: 'utf8' });
callback(null, logs);
}
function sendCrashNotification(processName) {
collectLogs(processName, function (error, result) {
if (error) {
console.error('Failed to collect logs.', error);
result = util.format('Failed to collect logs.', error);
}
console.log('Sending crash notification email for', processName);
mailer.sendCrashNotification(processName, result);
});
}
function main() {
if (process.argv.length !== 3) return console.error('Usage: crashnotifier.js <processName>');
var processName = process.argv[2];
console.log('Started crash notifier for', processName);
sendCrashNotification(processName);
}
main();
+16
View File
@@ -0,0 +1,16 @@
#!/usr/bin/env node
'use strict';
var sendFailureLogs = require('./src/logcollector').sendFailureLogs;
function main() {
if (process.argv.length !== 3) return console.error('Usage: crashnotifier.js <processName>');
var processName = process.argv[2];
console.log('Started crash notifier for', processName);
sendFailureLogs(processName, { unit: processName });
}
main();
@@ -7,7 +7,7 @@ StopWhenUnneeded=false
[Service]
Type=idle
WorkingDirectory=/home/yellowtent/box
ExecStart="/home/yellowtent/box/crashnotifier.js" %I
ExecStart="/home/yellowtent/box/crashnotifierservice.js" %I
Environment="HOME=/home/yellowtent" "USER=yellowtent" "DEBUG=box*,connect-lastmile" "BOX_ENV=cloudron" "NODE_ENV=production"
KillMode=process
User=yellowtent
+1 -1
View File
@@ -162,7 +162,7 @@ function processDockerEvents() {
debug('OOM Context: %s', context);
// do not send mails for dev apps
if (error || app.appStoreId !== '') mailer.sendCrashNotification(program, context); // app can be null if it's an addon crash
if (error || app.appStoreId !== '') mailer.unexpectedExit(program, context); // app can be null if it's an addon crash
});
});
+11 -11
View File
@@ -1,7 +1,5 @@
#!/usr/bin/env node
/* jslint node:true */
'use strict';
exports = module.exports = {
@@ -232,17 +230,19 @@ function downloadIcon(app, callback) {
var iconUrl = config.apiServerOrigin() + '/api/v1/apps/' + app.appStoreId + '/versions/' + app.manifest.version + '/icon';
superagent
.get(iconUrl)
.buffer(true)
.end(function (error, res) {
if (error && !error.response) return callback(new Error('Network error downloading icon:' + error.message));
if (res.statusCode !== 200) return callback(null); // ignore error. this can also happen for apps installed with cloudron-cli
async.retry({ times: 10, interval: 5000 }, function (retryCallback) {
superagent
.get(iconUrl)
.buffer(true)
.end(function (error, res) {
if (error && !error.response) return retryCallback(new Error('Network error downloading icon:' + error.message));
if (res.statusCode !== 200) return retryCallback(null); // ignore error. this can also happen for apps installed with cloudron-cli
if (!safe.fs.writeFileSync(path.join(paths.APPICONS_DIR, app.id + '.png'), res.body)) return callback(new Error('Error saving icon:' + safe.error.message));
if (!safe.fs.writeFileSync(path.join(paths.APPICONS_DIR, app.id + '.png'), res.body)) return retryCallback(new Error('Error saving icon:' + safe.error.message));
callback(null);
});
retryCallback(null);
});
}, callback);
}
function registerSubdomain(app, callback) {
+7 -2
View File
@@ -189,18 +189,23 @@ function copyLastBackup(app, callback) {
assert.strictEqual(typeof app.lastBackupId, 'string');
assert.strictEqual(typeof callback, 'function');
var toFilenameArchive = util.format('appbackup_%s_%s-v%s.tar.gz', app.id, (new Date()).toISOString(), app.manifest.version);
var toFilenameConfig = util.format('appbackup_%s_%s-v%s.json', app.id, (new Date()).toISOString(), app.manifest.version);
var now = new Date();
var toFilenameArchive = util.format('appbackup_%s_%s-v%s.tar.gz', app.id, now.toISOString(), app.manifest.version);
var toFilenameConfig = util.format('appbackup_%s_%s-v%s.json', app.id, now.toISOString(), app.manifest.version);
settings.getBackupConfig(function (error, backupConfig) {
if (error) return callback(new BackupsError(BackupsError.INTERNAL_ERROR, error));
debug('copyLastBackup: copying archive %s to %s', app.lastBackupId, toFilenameArchive);
api(backupConfig.provider).copyObject(backupConfig, app.lastBackupId, toFilenameArchive, function (error) {
if (error) return callback(new BackupsError(BackupsError.EXTERNAL_ERROR, error));
// TODO change that logic by adjusting app.lastBackupId to not contain the file type
var configFileId = app.lastBackupId.slice(0, -'.tar.gz'.length) + '.json';
debug('copyLastBackup: copying config %s to %s', configFileId, toFilenameConfig);
api(backupConfig.provider).copyObject(backupConfig, configFileId, toFilenameConfig, function (error) {
if (error) return callback(new BackupsError(BackupsError.EXTERNAL_ERROR, error));
+1 -1
View File
@@ -102,7 +102,7 @@ function initialize(callback) {
assert.strictEqual(typeof callback, 'function');
exports.events.on(exports.EVENT_CONFIGURED, addDnsRecords);
exports.events.on(exports.EVENT_FIRST_RUN, function () { setTimeout(installAppBundle, 30000); }); // wait for 30sec for the addons to start up
exports.events.on(exports.EVENT_FIRST_RUN, installAppBundle);
// check activation state for existing cloudrons that do not have first run file
// can be removed once cloudrons have been updated
+37
View File
@@ -0,0 +1,37 @@
'use strict';
exports = module.exports = {
sendFailureLogs: sendFailureLogs
};
var assert = require('assert'),
mailer = require('./mailer.js'),
safe = require('safetydance'),
path = require('path'),
util = require('util');
var COLLECT_LOGS_CMD = path.join(__dirname, 'scripts/collectlogs.sh');
function collectLogs(unitName, callback) {
assert.strictEqual(typeof unitName, 'string');
assert.strictEqual(typeof callback, 'function');
var logs = safe.child_process.execSync('sudo ' + COLLECT_LOGS_CMD + ' ' + unitName, { encoding: 'utf8' });
callback(null, logs);
}
function sendFailureLogs(processName, options) {
assert.strictEqual(typeof processName, 'string');
assert.strictEqual(typeof options, 'object');
collectLogs(options.unit || processName, function (error, result) {
if (error) {
console.error('Failed to collect logs.', error);
result = util.format('Failed to collect logs.', error);
}
console.log('Sending failure logs for', processName);
mailer.unexpectedExit(processName, result);
});
}
@@ -2,7 +2,7 @@
Dear Cloudron Team,
Unfortunately <%= program %> on <%= fqdn %> crashed unexpectedly!
Unfortunately <%= program %> on <%= fqdn %> exited unexpectedly!
Please see some excerpt of the logs below.
+3 -3
View File
@@ -12,7 +12,7 @@ exports = module.exports = {
appUpdateAvailable: appUpdateAvailable,
sendInvite: sendInvite,
sendCrashNotification: sendCrashNotification,
unexpectedExit: unexpectedExit,
appDied: appDied,
@@ -395,7 +395,7 @@ function certificateRenewed(domain, message) {
// this function bypasses the queue intentionally. it is also expected to work without the mailer module initialized
// crashnotifier should be able to send mail when there is no db
function sendCrashNotification(program, context) {
function unexpectedExit(program, context) {
assert.strictEqual(typeof program, 'string');
assert.strictEqual(typeof context, 'string');
@@ -403,7 +403,7 @@ function sendCrashNotification(program, context) {
from: config.adminEmail(),
to: 'admin@cloudron.io',
subject: util.format('[%s] %s exited unexpectedly', config.fqdn(), program),
text: render('crash_notification.ejs', { fqdn: config.fqdn(), program: program, context: context, format: 'text' })
text: render('unexpected_exit.ejs', { fqdn: config.fqdn(), program: program, context: context, format: 'text' })
};
sendMails([ mailOptions ]);
+1 -1
View File
@@ -21,7 +21,7 @@ readonly program_name=$1
echo "${program_name}.log"
echo "-------------------"
journalctl --all --no-pager -u ${program_name} -n 100
journalctl --all --no-pager -u ${program_name} -n 300
echo
echo
echo "dmesg"
+18
View File
@@ -19,11 +19,13 @@ var appdb = require('./appdb.js'),
cloudron = require('./cloudron.js'),
debug = require('debug')('box:taskmanager'),
locker = require('./locker.js'),
sendFailureLogs = require('./logcollector.js').sendFailureLogs,
util = require('util'),
_ = require('underscore');
var gActiveTasks = { };
var gPendingTasks = [ ];
var gPlatformReady = false; // PaaS (addons) up and running
var TASK_CONCURRENCY = 5;
var NOOP_CALLBACK = function (error) { if (error) console.error(error); };
@@ -39,6 +41,11 @@ function initialize(callback) {
cloudron.events.on(cloudron.EVENT_CONFIGURED, resumeTasks);
}
setTimeout(function () {
gPlatformReady = true;
resumeTasks();
}, 30000); // wait 30 seconds to signal platform ready
callback();
}
@@ -82,6 +89,8 @@ function resumeTasks(callback) {
apps.forEach(function (app) {
if (app.installationState === appdb.ISTATE_INSTALLED && app.runState === appdb.RSTATE_RUNNING) return;
if (app.installationState === appdb.ISTATE_ERROR) return;
debug('Creating process for %s (%s) with state %s', app.location, app.id, app.installationState);
startAppTask(app.id, NOOP_CALLBACK);
});
@@ -106,6 +115,12 @@ function startAppTask(appId, callback) {
return callback(new Error(util.format('Task for %s is already active', appId)));
}
if (!gPlatformReady) {
debug('Platform not ready yet, queueing task for %s', appId);
gPendingTasks.push(appId);
return callback();
}
if (Object.keys(gActiveTasks).length >= TASK_CONCURRENCY) {
debug('Reached concurrency limit, queueing task for %s', appId);
gPendingTasks.push(appId);
@@ -130,7 +145,10 @@ function startAppTask(appId, callback) {
debug('Task for %s pid %s completed with status %s', appId, pid, code);
if (code === null /* signal */ || (code !== 0 && code !== 50)) { // apptask crashed
debug('Apptask crashed with code %s and signal %s', code, signal);
sendFailureLogs('apptask', { unit: 'box' });
appdb.update(appId, { installationState: appdb.ISTATE_ERROR, installationProgress: 'Apptask crashed with code ' + code + ' and signal ' + signal }, NOOP_CALLBACK);
} else if (code === 50) {
sendFailureLogs('apptask', { unit: 'box' });
}
delete gActiveTasks[appId];
locker.unlock(locker.OP_APPTASK); // unlock event will trigger next task