Compare commits

...

15 Commits

Author SHA1 Message Date
Girish Ramakrishnan 26aefadfba systemd: fix crashnotifier 2015-09-07 21:40:01 -07:00
Girish Ramakrishnan 51a28842cf systemd: pass the instance name as argument 2015-09-07 21:16:22 -07:00
Girish Ramakrishnan 210c2f3cc1 Output some logs in crashnotifier 2015-09-07 21:10:00 -07:00
Girish Ramakrishnan 773c326eb7 systemd: just wait for 5 seconds for box to die 2015-09-07 20:58:14 -07:00
Girish Ramakrishnan cb2fb026c5 systemd: do not restart crashnotifier 2015-09-07 20:54:58 -07:00
Girish Ramakrishnan a4731ad054 200m is a more sane memory limit 2015-09-07 20:48:29 -07:00
Girish Ramakrishnan aa33938fb5 systemd: fix config files 2015-09-07 20:46:32 -07:00
Girish Ramakrishnan edfe8f1ad0 disable pager when collecting logs 2015-09-07 20:27:27 -07:00
Girish Ramakrishnan 41399a2593 Make crashnotifier.js executable 2015-09-07 20:15:13 -07:00
Girish Ramakrishnan 2a4c467ab8 systemd: Fix crashnotifier 2015-09-07 20:14:37 -07:00
Girish Ramakrishnan 6be6092c0e Add memory limits on services 2015-09-07 19:16:34 -07:00
Girish Ramakrishnan e76584b0da Move from supervisor to systemd
This removes logrotate as well since we use systemd logging
2015-09-07 14:31:25 -07:00
Girish Ramakrishnan b3816615db run upto 5 apptasks in parallel
fixes #482
2015-09-05 09:17:46 -07:00
Johannes Zellner 212d0bd55a Revert "Add hack for broken app backup tarballs"
This reverts commit 9723951bfc.
2015-08-31 21:44:24 -07:00
Girish Ramakrishnan 712ada940e Add hack for broken app backup tarballs 2015-08-31 18:58:38 -07:00
27 changed files with 155 additions and 196 deletions
+1 -5
View File
@@ -4,10 +4,6 @@ docs/
webadmin/dist/
setup/splash/website/
# vim swam files
# vim swap files
*.swp
# supervisor
supervisord.pid
supervisord.log
-1
View File
@@ -4,7 +4,6 @@ The Box
Development setup
-----------------
* sudo useradd -m yellowtent
** This dummy user is required for supervisor 'box' configs
** Add admin-localhost as 127.0.0.1 in /etc/hosts
** All apps will be installed as hypened-subdomains of localhost. You should add
hyphened-subdomains of your apps into /etc/hosts
Regular → Executable
+20 -26
View File
@@ -2,20 +2,12 @@
'use strict';
// WARNING This is a supervisor eventlistener!
// The communication happens via stdin/stdout
// !! No console.log() allowed
// !! Do not set DEBUG
var assert = require('assert'),
mailer = require('./src/mailer.js'),
safe = require('safetydance'),
supervisor = require('supervisord-eventlistener'),
path = require('path'),
util = require('util');
var gLastNotifyTime = {};
var gCooldownTime = 1000 * 60 * 5; // 5 min
var COLLECT_LOGS_CMD = path.join(__dirname, 'src/scripts/collectlogs.sh');
function collectLogs(program, callback) {
@@ -26,28 +18,30 @@ function collectLogs(program, callback) {
callback(null, logs);
}
supervisor.on('PROCESS_STATE_EXITED', function (headers, data) {
if (data.expected === '1') return console.error('Normal app %s exit', data.processname);
console.error('%s exited unexpectedly', data.processname);
collectLogs(data.processname, function (error, result) {
function sendCrashNotification(processName) {
collectLogs(processName, function (error, result) {
if (error) {
console.error('Failed to collect logs.', error);
result = util.format('Failed to collect logs.', error);
}
if (!gLastNotifyTime[data.processname] || gLastNotifyTime[data.processname] < Date.now() - gCooldownTime) {
console.error('Send mail.');
mailer.sendCrashNotification(data.processname, result);
gLastNotifyTime[data.processname] = Date.now();
} else {
console.error('Do not send mail, already sent one recently.');
}
console.log('Sending crash notification email for', processName);
mailer.sendCrashNotification(processName, result);
});
});
}
function main() {
if (process.argv.length !== 3) return console.error('Usage: crashnotifier.js <processName>');
var processName = process.argv[2];
console.log('Started crash notifier for', processName);
mailer.initialize(function (error) {
if (error) return console.error(error);
sendCrashNotification(processName);
});
}
main();
mailer.initialize(function () {
supervisor.listen(process.stdin, process.stdout);
console.error('Crashnotifier listening...');
});
+7 -13
View File
@@ -9,22 +9,22 @@
},
"aws-sdk": {
"version": "2.1.46",
"from": "aws-sdk@*",
"from": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.1.46.tgz",
"resolved": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.1.46.tgz",
"dependencies": {
"sax": {
"version": "0.5.3",
"from": "sax@0.5.3",
"from": "http://registry.npmjs.org/sax/-/sax-0.5.3.tgz",
"resolved": "http://registry.npmjs.org/sax/-/sax-0.5.3.tgz"
},
"xml2js": {
"version": "0.2.8",
"from": "xml2js@0.2.8",
"from": "https://registry.npmjs.org/xml2js/-/xml2js-0.2.8.tgz",
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.2.8.tgz"
},
"xmlbuilder": {
"version": "0.4.2",
"from": "xmlbuilder@0.4.2",
"from": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-0.4.2.tgz",
"resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-0.4.2.tgz"
}
}
@@ -156,16 +156,15 @@
"connect-lastmile": {
"version": "0.0.13",
"from": "connect-lastmile@0.0.13",
"resolved": "https://registry.npmjs.org/connect-lastmile/-/connect-lastmile-0.0.13.tgz",
"dependencies": {
"debug": {
"version": "2.1.3",
"from": "https://registry.npmjs.org/debug/-/debug-2.1.3.tgz",
"from": "debug@>=2.1.0 <2.2.0",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.1.3.tgz",
"dependencies": {
"ms": {
"version": "0.7.0",
"from": "http://registry.npmjs.org/ms/-/ms-0.7.0.tgz",
"from": "ms@0.7.0",
"resolved": "http://registry.npmjs.org/ms/-/ms-0.7.0.tgz"
}
}
@@ -2268,7 +2267,7 @@
},
"safetydance": {
"version": "0.0.19",
"from": "https://registry.npmjs.org/safetydance/-/safetydance-0.0.19.tgz",
"from": "safetydance@0.0.19",
"resolved": "https://registry.npmjs.org/safetydance/-/safetydance-0.0.19.tgz"
},
"semver": {
@@ -2442,11 +2441,6 @@
}
}
},
"supervisord-eventlistener": {
"version": "0.1.0",
"from": "https://registry.npmjs.org/supervisord-eventlistener/-/supervisord-eventlistener-0.1.0.tgz",
"resolved": "https://registry.npmjs.org/supervisord-eventlistener/-/supervisord-eventlistener-0.1.0.tgz"
},
"tail-stream": {
"version": "0.2.1",
"from": "https://registry.npmjs.org/tail-stream/-/tail-stream-0.2.1.tgz",
-1
View File
@@ -61,7 +61,6 @@
"split": "^1.0.0",
"superagent": "~0.21.0",
"supererror": "^0.7.0",
"supervisord-eventlistener": "^0.1.0",
"tail-stream": "https://registry.npmjs.org/tail-stream/-/tail-stream-0.2.1.tgz",
"underscore": "^1.7.0",
"valid-url": "^1.0.9",
+2 -2
View File
@@ -16,7 +16,7 @@ and replace it with a new one for an update.
Because we do not package things as Docker yet, we should be careful
about the code here. We have to expect remains of an older setup code.
For example, older supervisor or nginx configs might be around.
For example, older systemd or nginx configs might be around.
The config directory is _part_ of the container and is not a VOLUME.
Which is to say that the files will be nuked from one update to the next.
@@ -40,7 +40,7 @@ version (see below) or the mysql/postgresql data etc.
* It then setups up the cloud infra (setup_infra.sh) and creates cloudron.conf.
* supervisor is then started
* box services are then started
setup_infra.sh
This setups containers like graphite, mail and the addons containers.
+4 -7
View File
@@ -13,13 +13,10 @@ readonly DATA_DIR="/home/yellowtent/data"
rm -rf "${CONFIG_DIR}"
sudo -u yellowtent mkdir "${CONFIG_DIR}"
########## logrotate (default ubuntu runs this daily)
rm -rf /etc/logrotate.d/*
cp -r "${container_files}/logrotate/." /etc/logrotate.d/
########## supervisor
rm -rf /etc/supervisor/*
cp -r "${container_files}/supervisor/." /etc/supervisor/
########## systemd
cp -r "${container_files}/systemd/." /etc/systemd/system/
systemctl daemon-reload
systemctl enable cloudron.target
########## sudoers
rm /etc/sudoers.d/*
-6
View File
@@ -1,6 +0,0 @@
/var/log/cloudron/*log {
missingok
notifempty
size 100k
nocompress
}
-7
View File
@@ -1,7 +0,0 @@
/var/log/supervisor/*log {
missingok
copytruncate
notifempty
size 100k
nocompress
}
@@ -1,10 +0,0 @@
[program:apphealthtask]
command=/usr/bin/node "/home/yellowtent/box/apphealthtask.js"
autostart=true
autorestart=true
redirect_stderr=true
stdout_logfile=/var/log/supervisor/apphealthtask.log
stdout_logfile_maxbytes=50MB
stdout_logfile_backups=2
user=yellowtent
environment=HOME="/home/yellowtent",USER="yellowtent",DEBUG="box*",BOX_ENV="cloudron",NODE_ENV="production"
@@ -1,10 +0,0 @@
[program:box]
command=/usr/bin/node "/home/yellowtent/box/app.js"
autostart=true
autorestart=true
redirect_stderr=true
stdout_logfile=/var/log/supervisor/box.log
stdout_logfile_maxbytes=50MB
stdout_logfile_backups=2
user=yellowtent
environment=HOME="/home/yellowtent",USER="yellowtent",DEBUG="box*,connect-lastmile",BOX_ENV="cloudron",NODE_ENV="production"
@@ -1,11 +0,0 @@
[eventlistener:crashnotifier]
command=/usr/bin/node "/home/yellowtent/box/crashnotifier.js"
events=PROCESS_STATE
autostart=true
autorestart=true
redirect_stderr=false
stderr_logfile=/var/log/supervisor/crashnotifier.log
stderr_logfile_maxbytes=50MB
stderr_logfile_backups=2
user=yellowtent
environment=HOME="/home/yellowtent",USER="yellowtent",BOX_ENV="cloudron",NODE_ENV="production"
@@ -1,10 +0,0 @@
[program:janitor]
command=/usr/bin/node "/home/yellowtent/box/janitor.js"
autostart=true
autorestart=true
redirect_stderr=true
stdout_logfile=/var/log/supervisor/janitor.log
stdout_logfile_maxbytes=50MB
stdout_logfile_backups=2
user=yellowtent
environment=HOME="/home/yellowtent",USER="yellowtent",DEBUG="box*",BOX_ENV="cloudron",NODE_ENV="production"
@@ -1,10 +0,0 @@
[program:oauthproxy]
command=/usr/bin/node "/home/yellowtent/box/oauthproxy.js"
autostart=true
autorestart=true
redirect_stderr=true
stdout_logfile=/var/log/supervisor/oauthproxy.log
stdout_logfile_maxbytes=50MB
stdout_logfile_backups=2
user=yellowtent
environment=HOME="/home/yellowtent",USER="yellowtent",DEBUG="box*",BOX_ENV="cloudron",NODE_ENV="production"
@@ -1,33 +0,0 @@
; supervisor config file
; http://coffeeonthekeyboard.com/using-supervisorctl-with-linux-permissions-but-without-root-or-sudo-977/
[inet_http_server]
port = 127.0.0.1:9001
[supervisord]
logfile=/var/log/supervisor/supervisord.log ; (main log file;default $CWD/supervisord.log)
pidfile=/var/run/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
logfile_maxbytes = 50MB
logfile_backups=10
loglevel = info
nodaemon = false
childlogdir = /var/log/supervisor/
; the below section must remain in the config file for RPC
; (supervisorctl/web interface) to work, additional interfaces may be
; added by defining them in separate rpcinterface: sections
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=http://127.0.0.1:9001
; The [include] section can just contain the "files" setting. This
; setting can list multiple files (separated by whitespace or
; newlines). It can also contain wildcards. The filenames are
; interpreted as relative to this file. Included files *cannot*
; include files themselves.
[include]
files = conf.d/*.conf
@@ -0,0 +1,15 @@
[Unit]
Description=Cloudron App Health Monitor
OnFailure=crashnotifier@%n.service
StopWhenUnneeded=true
[Service]
Type=idle
WorkingDirectory=/home/yellowtent/box
Restart=always
ExecStart="/home/yellowtent/box/apphealthtask.js"
Environment="HOME=/home/yellowtent" "USER=yellowtent" "DEBUG=box*,connect-lastmile" "BOX_ENV=cloudron" "NODE_ENV=production"
KillMode=process
User=yellowtent
Group=yellowtent
MemoryLimit=50M
+17
View File
@@ -0,0 +1,17 @@
[Unit]
Description=Cloudron Admin
OnFailure=crashnotifier@%n.service
StopWhenUnneeded=true
[Service]
Type=idle
WorkingDirectory=/home/yellowtent/box
Restart=always
ExecStart="/home/yellowtent/box/app.js"
Environment="HOME=/home/yellowtent" "USER=yellowtent" "DEBUG=box*,connect-lastmile" "BOX_ENV=cloudron" "NODE_ENV=production"
KillMode=process
User=yellowtent
Group=yellowtent
MemoryLimit=200M
TimeoutStopSec=5s
+10
View File
@@ -0,0 +1,10 @@
[Unit]
Description=Cloudron Smart Cloud
Documentation=https://cloudron.io/documentation.html
StopWhenUnneeded=true
Requires=apphealthtask.service box.service janitor.service oauthproxy.service
After=apphealthtask.service box.service janitor.service oauthproxy.service
# AllowIsolate=yes
[Install]
WantedBy=multi-user.target
@@ -0,0 +1,15 @@
# http://northernlightlabs.se/systemd.status.mail.on.unit.failure
[Unit]
Description=Cloudron Crash Notifier for %i
# otherwise, systemd will kill this unit immediately as nobody requires it
StopWhenUnneeded=false
[Service]
Type=idle
WorkingDirectory=/home/yellowtent/box
ExecStart="/home/yellowtent/box/crashnotifier.js" %I
Environment="HOME=/home/yellowtent" "USER=yellowtent" "DEBUG=box*,connect-lastmile" "BOX_ENV=cloudron" "NODE_ENV=production"
KillMode=process
User=yellowtent
Group=yellowtent
+16
View File
@@ -0,0 +1,16 @@
[Unit]
Description=Cloudron Janitor
OnFailure=crashnotifier@%n.service
StopWhenUnneeded=true
[Service]
Type=idle
WorkingDirectory=/home/yellowtent/box
Restart=always
ExecStart="/home/yellowtent/box/janitor.js"
Environment="HOME=/home/yellowtent" "USER=yellowtent" "DEBUG=box*,connect-lastmile" "BOX_ENV=cloudron" "NODE_ENV=production"
KillMode=process
User=yellowtent
Group=yellowtent
MemoryLimit=50M
@@ -0,0 +1,16 @@
[Unit]
Description=Cloudron OAuth Proxy Service
OnFailure=crashnotifier@%n.service
StopWhenUnneeded=true
[Service]
Type=idle
WorkingDirectory=/home/yellowtent/box
Restart=always
ExecStart="/home/yellowtent/box/oauthproxy.js"
Environment="HOME=/home/yellowtent" "USER=yellowtent" "DEBUG=box*,connect-lastmile" "BOX_ENV=cloudron" "NODE_ENV=production"
KillMode=process
User=yellowtent
Group=yellowtent
MemoryLimit=50M
+3 -15
View File
@@ -166,22 +166,10 @@ ADMIN_SCOPES="root,developer,profile,users,apps,settings,roleUser"
mysql -u root -p${mysql_root_password} \
-e "REPLACE INTO clients (id, appId, clientSecret, redirectURI, scope) VALUES (\"cid-test\", \"test\", \"secret-test\", \"http://127.0.0.1:5000\", \"${ADMIN_SCOPES}\")" box
set_progress "80" "Reloading supervisor"
# looks like restarting supervisor completely is the only way to reload it
service supervisor stop || true
set_progress "80" "Starting Cloudron"
systemctl start cloudron.target
echo -n "Waiting for supervisord to stop"
while test -e "/var/run/supervisord.pid" && kill -0 `cat /var/run/supervisord.pid`; do
echo -n "."
sleep 1
done
echo ""
echo "Starting supervisor"
service supervisor start
sleep 2 # give supervisor sometime to start the processes
sleep 2 # give systemd sometime to start the processes
set_progress "85" "Reloading nginx"
nginx -s reload
+2 -10
View File
@@ -2,14 +2,6 @@
set -eu -o pipefail
echo "Stopping box code"
service supervisor stop || true
echo -n "Waiting for supervisord to stop"
while test -e "/var/run/supervisord.pid" && kill -0 `cat /var/run/supervisord.pid`; do
echo -n "."
sleep 1
done
echo ""
echo "Stopping cloudron"
systemctl stop cloudron.target
+21 -5
View File
@@ -9,6 +9,7 @@ function Locker() {
this._operation = null;
this._timestamp = null;
this._watcherId = -1;
this._lockDepth = 0; // recursive locks
}
util.inherits(Locker, EventEmitter);
@@ -24,6 +25,7 @@ Locker.prototype.lock = function (operation) {
if (this._operation !== null) return new Error('Already locked for ' + this._operation);
this._operation = operation;
++this._lockDepth;
this._timestamp = new Date();
var that = this;
this._watcherId = setInterval(function () { debug('Lock unreleased %s', that._operation); }, 1000 * 60 * 5);
@@ -35,17 +37,31 @@ Locker.prototype.lock = function (operation) {
return null;
};
Locker.prototype.recursiveLock = function (operation) {
if (this._operation === operation) {
++this._lockDepth;
debug('Re-acquired : %s Depth : %s', this._operation, this._lockDepth);
return null;
}
return this.lock(operation);
};
Locker.prototype.unlock = function (operation) {
assert.strictEqual(typeof operation, 'string');
if (this._operation !== operation) throw new Error('Mismatched unlock. Current lock is for ' + this._operation); // throw because this is a programming error
debug('Released : %s', this._operation);
if (--this._lockDepth === 0) {
debug('Released : %s', this._operation);
this._operation = null;
this._timestamp = null;
clearInterval(this._watcherId);
this._watcherId = -1;
this._operation = null;
this._timestamp = null;
clearInterval(this._watcherId);
this._watcherId = -1;
} else {
debug('Recursive lock released : %s. Depth : %s', this._operation, this._lockDepth);
}
this.emit('unlocked', operation);
+2 -2
View File
@@ -21,7 +21,7 @@ readonly program_name=$1
echo "${program_name}.log"
echo "-------------------"
tail --lines=100 /var/log/supervisor/${program_name}.log
journalctl --no-pager -u ${program_name} -n 100
echo
echo
echo "dmesg"
@@ -31,7 +31,7 @@ echo
echo
echo "docker"
echo "------"
tail --lines=100 /var/log/upstart/docker.log
journalctl --no-pager -u docker -n 50
echo
echo
-6
View File
@@ -12,12 +12,6 @@ if [[ $# == 1 && "$1" == "--check" ]]; then
exit 0
fi
if [[ "${OSTYPE}" == "darwin"* ]]; then
# On Mac, brew installs supervisor in /usr/local/bin
export PATH=$PATH:/usr/local/bin
fi
if [[ "${BOX_ENV}" == "cloudron" ]]; then
nginx -s reload
fi
+4 -6
View File
@@ -17,10 +17,7 @@ var appdb = require('./appdb.js'),
var gActiveTasks = { };
var gPendingTasks = [ ];
// Task concurrency is 1 for two reasons:
// 1. The backup scripts (app and box) turn off swap after finish disregarding other backup processes
// 2. apptask getFreePort has race with multiprocess
var TASK_CONCURRENCY = 1;
var TASK_CONCURRENCY = 5;
var NOOP_CALLBACK = function (error) { console.error(error); };
function initialize(callback) {
@@ -54,7 +51,8 @@ function uninitialize(callback) {
function startNextTask() {
if (gPendingTasks.length === 0) return;
assert.strictEqual(Object.keys(gActiveTasks).length, 0); // since we allow only one task at a time
assert(Object.keys(gActiveTasks).length < TASK_CONCURRENCY);
startAppTask(gPendingTasks.shift());
}
@@ -63,7 +61,7 @@ function startAppTask(appId) {
assert.strictEqual(typeof appId, 'string');
assert(!(appId in gActiveTasks));
var lockError = locker.lock(locker.OP_APPTASK);
var lockError = locker.recursiveLock(locker.OP_APPTASK);
if (lockError || Object.keys(gActiveTasks).length >= TASK_CONCURRENCY) {
debug('Reached concurrency limit, queueing task for %s', appId);