diff --git a/src/apphealthmonitor.js b/src/apphealthmonitor.js index 718d67455..cb7ab9c2f 100644 --- a/src/apphealthmonitor.js +++ b/src/apphealthmonitor.js @@ -6,8 +6,9 @@ var appdb = require('./appdb.js'), async = require('async'), DatabaseError = require('./databaseerror.js'), debug = require('debug')('box:apphealthmonitor'), - docker = require('./docker.js').connection, + docker = require('./docker.js'), eventlog = require('./eventlog.js'), + safe = require('safetydance'), superagent = require('superagent'), util = require('util'); @@ -77,11 +78,10 @@ function checkAppHealth(app, callback) { return callback(null); } - var container = docker.getContainer(app.containerId), - manifest = app.manifest; + const manifest = app.manifest; - container.inspect(function (err, data) { - if (err || !data || !data.State) { + docker.inspect(app.containerId, function (error, data) { + if (error || !data || !data.State) { debugApp(app, 'Error inspecting container'); return setHealth(app, appdb.HEALTH_ERROR, callback); } @@ -116,6 +116,18 @@ function checkAppHealth(app, callback) { }); } +function getContainerInfo(containerId, callback) { + docker.inspect(containerId, function (error, result) { + if (error) return callback(error); + + const appId = safe.query(result, 'Config.Labels.appId', null); + + if (!appId) return callback(null, null /* app */, { name: result.Name }); // addon + + apps.get(appId, callback); // don't get by container id as this can be an exec container + }); +} + /* OOM can be tested using stress tool like so: docker run -ti -m 100M cloudron/base:0.10.0 /bin/bash @@ -134,21 +146,20 @@ function processDockerEvents(intervalSecs, callback) { stream.setEncoding('utf8'); stream.on('data', function (data) { - var ev = JSON.parse(data); - var containerId = ev.id; - - appdb.getByContainerId(containerId, function (error, app) { // this can error for addons - var program = error || !app.id ? containerId : `app-${app.id}`; - var now = Date.now(); + const event = JSON.parse(data); + const containerId = String(event.id); + getContainerInfo(containerId, function (error, app, addon) { + const program = error ? containerId : (app ? app.fqdn : addon.name); + const now = Date.now(); const notifyUser = (!app || !app.debugMode) && (now - gLastOomMailTime > OOM_MAIL_LIMIT); - debug('OOM %s notifyUser: %s. lastOomTime: %s (now: %s)', program, notifyUser, gLastOomMailTime, now, ev); + debug('OOM %s notifyUser: %s. lastOomTime: %s (now: %s)', program, notifyUser, gLastOomMailTime, now); // do not send mails for dev apps if (notifyUser) { // app can be null for addon containers - eventlog.add(eventlog.ACTION_APP_OOM, AUDIT_SOURCE, { ev: ev, containerId: containerId, app: app || null }); + eventlog.add(eventlog.ACTION_APP_OOM, AUDIT_SOURCE, { event: event, containerId: containerId, addon: addon || null, app: app || null }); gLastOomMailTime = now; } diff --git a/src/docker.js b/src/docker.js index 6f32a3e6c..b81593d86 100644 --- a/src/docker.js +++ b/src/docker.js @@ -22,6 +22,7 @@ exports = module.exports = { getContainerIdByIp: getContainerIdByIp, inspect: inspect, inspectByName: inspect, + getEvents: getEvents, memoryUsage: memoryUsage, execContainer: execContainer, createVolume: createVolume, @@ -474,6 +475,19 @@ function inspect(containerId, callback) { }); } +function getEvents(options, callback) { + assert.strictEqual(typeof options, 'object'); + assert.strictEqual(typeof callback, 'function'); + + let docker = exports.connection; + + docker.getEvents(options, function (error, stream) { + if (error) return callback(new DockerError(DockerError.INTERNAL_ERROR, error)); + + callback(null, stream); + }); +} + function memoryUsage(containerId, callback) { assert.strictEqual(typeof containerId, 'string'); assert.strictEqual(typeof callback, 'function'); diff --git a/src/mail_templates/oom_event.ejs b/src/mail_templates/oom_event.ejs index 08cfd2861..7659e3488 100644 --- a/src/mail_templates/oom_event.ejs +++ b/src/mail_templates/oom_event.ejs @@ -2,22 +2,21 @@ Dear <%= cloudronName %> Admin, -<%= program %> exited unexpectedly using too much memory! +<%= program %> has bee restarted now as it ran out of memory. -The app has been restarted now. Should this message appear repeatedly or -undefined behavior is observed, give the app more memory. -This can be done in the advanced settings in the app configuration dialog -in your Cloudron's web interface. +Should this message appear repeatedly or undefined behavior is observed, give the app more memory. -Please see some excerpt of the logs below. +* To increase an app's memory limit - https://cloudron.io/documentation/apps/#increasing-the-memory-limit-of-an-app +* To increase a service's memory limit - https://cloudron.io/documentation/troubleshooting/#services + +Out of memory event: ------------------------------------- -<%- context %> +<%- event %> ------------------------------------- - Powered by https://cloudron.io Sent at: <%= new Date().toUTCString() %> diff --git a/src/mailer.js b/src/mailer.js index 7ad92ebd4..4b4c88f69 100644 --- a/src/mailer.js +++ b/src/mailer.js @@ -481,10 +481,10 @@ function certificateRenewalError(domain, message) { }); } -function oomEvent(mailTo, program, context) { +function oomEvent(mailTo, program, event) { assert.strictEqual(typeof mailTo, 'string'); assert.strictEqual(typeof program, 'string'); - assert.strictEqual(typeof context, 'string'); + assert.strictEqual(typeof event, 'object'); getMailConfig(function (error, mailConfig) { if (error) return debug('Error getting mail details:', error); @@ -492,8 +492,8 @@ function oomEvent(mailTo, program, context) { var mailOptions = { from: mailConfig.notificationFrom, to: mailTo, - subject: util.format('[%s] %s exited unexpectedly', mailConfig.cloudronName, program), - text: render('oom_event.ejs', { cloudronName: mailConfig.cloudronName, program: program, context: context, format: 'text' }) + subject: util.format('[%s] %s was restarted (OOM)', mailConfig.cloudronName, program), + text: render('oom_event.ejs', { cloudronName: mailConfig.cloudronName, program: program, event: event, format: 'text' }) }; sendMails([ mailOptions ]); diff --git a/src/notifications.js b/src/notifications.js index bb921112c..36e29479c 100644 --- a/src/notifications.js +++ b/src/notifications.js @@ -180,23 +180,35 @@ function adminChanged(performedBy, eventId, user, callback) { }, callback); } -function oomEvent(eventId, program, context, callback) { +function oomEvent(eventId, app, addon, containerId, event, callback) { assert.strictEqual(typeof eventId, 'string'); - assert.strictEqual(typeof program, 'string'); - assert.strictEqual(typeof context, 'object'); + assert.strictEqual(typeof app, 'object'); + assert.strictEqual(typeof addon, 'object'); + assert.strictEqual(typeof containerId, 'string'); assert.strictEqual(typeof callback, 'function'); + let title, message, program; + if (app) { + program = app.fqdn; + title = `The application ${app.fqdn} (${app.manifest.title}) ran out of memory.`; + message = 'The application has been restarted automatically. If you see this notification often, consider increasing the [memory limit](https://cloudron.io/documentation/apps/#increasing-the-memory-limit-of-an-app)'; + } else if (addon) { + program = addon.name; + title = `The ${addon.name} service ran out of memory`; + message = 'The service has been restarted automatically. If you see this notification often, consider increasing the [memory limit](https://cloudron.io/documentation/troubleshooting/#services)'; + } else { + program = containerId; + title = `The container ${containerId} ran out of memory`; + message = 'The container has been restarted automatically. Consider increasing the [memory limit](https://docs.docker.com/v17.09/edge/engine/reference/commandline/update/#update-a-containers-kernel-memory-constraints)'; + } + // also send us a notification mail - if (config.provider() === 'caas') mailer.oomEvent('support@cloudron.io', program, JSON.stringify(context, null, 4)); + if (config.provider() === 'caas') mailer.oomEvent('support@cloudron.io', program, event); actionForAllAdmins([], function (admin, done) { - mailer.oomEvent(admin.email, program, JSON.stringify(context, null, 4)); + mailer.oomEvent(admin.email, program, event); - var message; - if (context.app) message = `The application ${context.app.manifest.title} with id ${context.app.id} ran out of memory.`; - else message = `The container with id ${context.details.id} ran out of memory`; - - add(admin.id, eventId, 'Process died out-of-memory', message, done); + add(admin.id, eventId, title, message, done); }, callback); } @@ -351,7 +363,7 @@ function onEvent(id, action, source, data, callback) { case eventlog.ACTION_USER_ADD: return userAdded(source.userId, id, data.user, callback); case eventlog.ACTION_USER_REMOVE: return userRemoved(source.userId, id, data.user, callback); case eventlog.ACTION_USER_UPDATE: return data.adminStatusChanged ? adminChanged(source.userId, id, data.user, callback) : callback(); - case eventlog.ACTION_APP_OOM: return oomEvent(id, data.app ? data.app.id : data.containerId, { app: data.app, details: data }, callback); + case eventlog.ACTION_APP_OOM: return oomEvent(id, data.app, data.addon, data.containerId, data.event, callback); case eventlog.ACTION_APP_DOWN: return appDied(id, data.app, callback); case eventlog.ACTION_APP_UP: return appUp(id, data.app, callback); case eventlog.ACTION_APP_TASK_CRASH: return apptaskCrash(id, data.appId, data.crashLogFile, callback);