fb39aa32bb
https://forum.cloudron.io/topic/13408/update-to-cloudron-8.3-error We get a Task xx crashed with code null in the notification. The crux of the issue is that we use KillMode=control-group. This ends up sending SIGTERM signal to box code and all the sudo in parallel. The box code then sees the sudo die and records the task as failed. To fix, we switch to KillMode=mixed. This gives box code a chance to handle SIGTERM first. It cleans out its task list and kills all the sudo.
262 lines
11 KiB
JavaScript
262 lines
11 KiB
JavaScript
'use strict';
|
|
|
|
exports = module.exports = {
|
|
initialize,
|
|
uninitialize,
|
|
|
|
onActivated,
|
|
onDashboardLocationSet,
|
|
onDashboardLocationChanged,
|
|
onMailServerLocationChanged,
|
|
onMailServerIncomingDomainsChanged,
|
|
|
|
getStatus
|
|
};
|
|
|
|
const apps = require('./apps.js'),
|
|
appTaskManager = require('./apptaskmanager.js'),
|
|
assert = require('assert'),
|
|
AuditSource = require('./auditsource.js'),
|
|
BoxError = require('./boxerror.js'),
|
|
constants = require('./constants.js'),
|
|
cron = require('./cron.js'),
|
|
dashboard = require('./dashboard.js'),
|
|
database = require('./database.js'),
|
|
debug = require('debug')('box:platform'),
|
|
dockerProxy = require('./dockerproxy.js'),
|
|
fs = require('fs'),
|
|
infra = require('./infra_version.js'),
|
|
locks = require('./locks.js'),
|
|
oidcServer = require('./oidcserver.js'),
|
|
paths = require('./paths.js'),
|
|
reverseProxy = require('./reverseproxy.js'),
|
|
safe = require('safetydance'),
|
|
services = require('./services.js'),
|
|
shell = require('./shell.js')('platform'),
|
|
tasks = require('./tasks.js'),
|
|
timers = require('timers/promises'),
|
|
updater = require('./updater.js'),
|
|
users = require('./users.js'),
|
|
volumes = require('./volumes.js'),
|
|
_ = require('./underscore.js');
|
|
|
|
let gStatusMessage = 'Initializing';
|
|
|
|
function getStatus() {
|
|
return { message: gStatusMessage };
|
|
}
|
|
|
|
async function pruneVolumes() {
|
|
debug('pruneVolumes: remove all unused local volumes');
|
|
|
|
const [error] = await safe(shell.spawn('docker', [ 'volume', 'prune', '--all', '--force' ], {}));
|
|
if (error) console.log(`Error pruning volumes: ${error.mesage}`);
|
|
}
|
|
|
|
async function createDockerNetwork() {
|
|
debug('createDockerNetwork: recreating docker network');
|
|
|
|
await shell.spawn('docker', ['network', 'rm', '-f', 'cloudron'], {});
|
|
// the --ipv6 option will work even in ipv6 is disabled. fd00 is IPv6 ULA
|
|
await shell.spawn('docker', ['network', 'create', `--subnet=${constants.DOCKER_IPv4_SUBNET}`, `--ip-range=${constants.DOCKER_IPv4_RANGE}`,
|
|
`--gateway=${constants.DOCKER_IPv4_GATEWAY}`, '--ipv6', `--subnet=${constants.DOCKER_IPv6_SUBNET}`, 'cloudron'], {});
|
|
}
|
|
|
|
async function removeAllContainers() {
|
|
debug('removeAllContainers: removing all containers for infra upgrade');
|
|
|
|
const output = await shell.spawn('docker', ['ps', '-qa', '--filter', 'label=isCloudronManaged'], { encoding: 'utf8' });
|
|
if (!output) return;
|
|
|
|
for (const containerId of output.trim().split('\n')) {
|
|
debug(`removeAllContainers: stopping and removing ${containerId}`);
|
|
await shell.spawn('docker', ['stop', containerId], {});
|
|
await shell.spawn('docker', ['rm', '-f', containerId], {});
|
|
}
|
|
}
|
|
|
|
async function markApps(existingInfra, restoreOptions) {
|
|
assert.strictEqual(typeof existingInfra, 'object');
|
|
assert.strictEqual(typeof restoreOptions, 'object');
|
|
|
|
if (existingInfra.version === 'none') { // cloudron is being restored from backup
|
|
debug('markApps: restoring apps');
|
|
await apps.restoreApps(await apps.list(), restoreOptions, AuditSource.PLATFORM);
|
|
} else if (existingInfra.version !== infra.version) {
|
|
debug('markApps: reconfiguring apps');
|
|
reverseProxy.removeAppConfigs(); // should we change the cert location, nginx will not start
|
|
await apps.configureApps(await apps.list(), { scheduleNow: false }, AuditSource.PLATFORM); // we will schedule it when infra is ready
|
|
} else {
|
|
const changedAddons = [];
|
|
if (infra.images.mysql !== existingInfra.images.mysql) changedAddons.push('mysql');
|
|
if (infra.images.postgresql !== existingInfra.images.postgresql) changedAddons.push('postgresql');
|
|
if (infra.images.mongodb !== existingInfra.images.mongodb) changedAddons.push('mongodb');
|
|
if (infra.images.redis !== existingInfra.images.redis) changedAddons.push('redis');
|
|
|
|
if (changedAddons.length) {
|
|
// restart apps if docker image changes since the IP changes and any "persistent" connections fail
|
|
debug(`markApps: changedAddons: ${JSON.stringify(changedAddons)}`);
|
|
await apps.restartAppsUsingAddons(changedAddons, AuditSource.PLATFORM);
|
|
} else {
|
|
debug('markApps: apps are already uptodate');
|
|
}
|
|
}
|
|
}
|
|
|
|
async function onInfraReady(infraChanged) {
|
|
debug(`onInfraReady: platform is ready. infra changed: ${infraChanged}`);
|
|
gStatusMessage = 'Ready';
|
|
|
|
if (infraChanged) await safe(pruneVolumes(), { debug }); // ignore error
|
|
await apps.schedulePendingTasks(AuditSource.PLATFORM);
|
|
await appTaskManager.start();
|
|
}
|
|
|
|
async function startInfra(restoreOptions) {
|
|
assert.strictEqual(typeof restoreOptions, 'object');
|
|
|
|
if (constants.TEST && !process.env.TEST_CREATE_INFRA) return;
|
|
|
|
debug('startInfra: checking infrastructure');
|
|
|
|
let existingInfra = { version: 'none' };
|
|
if (fs.existsSync(paths.INFRA_VERSION_FILE)) {
|
|
existingInfra = safe.JSON.parse(fs.readFileSync(paths.INFRA_VERSION_FILE, 'utf8'));
|
|
if (!existingInfra) existingInfra = { version: 'corrupt' };
|
|
}
|
|
|
|
// short-circuit for the restart case
|
|
if (_.isEqual(infra, existingInfra)) {
|
|
debug('startInfra: infra is uptodate at version %s', infra.version);
|
|
await onInfraReady(false /* !infraChanged */);
|
|
return;
|
|
}
|
|
|
|
debug(`startInfra: updating infrastructure from ${existingInfra.version} to ${infra.version}`);
|
|
|
|
for (let attempt = 0; attempt < 5; attempt++) {
|
|
try {
|
|
await markApps(existingInfra, restoreOptions); // mark app state before we start addons. this gives the db import logic a chance to mark an app as errored
|
|
gStatusMessage = 'Updating platform, this can take a while';
|
|
if (existingInfra.version !== infra.version) {
|
|
gStatusMessage = 'Removing containers for upgrade';
|
|
await removeAllContainers();
|
|
await createDockerNetwork();
|
|
}
|
|
if (existingInfra.version === 'none') await volumes.mountAll(); // when restoring, mount all volumes
|
|
gStatusMessage = 'Starting services, this can take a while';
|
|
await services.startServices(existingInfra);
|
|
await fs.promises.writeFile(paths.INFRA_VERSION_FILE, JSON.stringify(infra, null, 4));
|
|
break;
|
|
} catch (error) {
|
|
// for some reason, mysql arbitrary restarts making startup tasks fail. this makes the box update stuck
|
|
// LOST is when existing connection breaks. REFUSED is when new connection cannot connect at all
|
|
const retry = error.reason === BoxError.DATABASE_ERROR && (error.code === 'PROTOCOL_CONNECTION_LOST' || error.code === 'ECONNREFUSED');
|
|
debug(`startInfra: Failed to start services. retry=${retry} (attempt ${attempt}): ${error.message}`);
|
|
if (!retry) throw error; // refuse to start
|
|
await timers.setTimeout(10000);
|
|
}
|
|
}
|
|
|
|
await onInfraReady(true /* infraChanged */);
|
|
}
|
|
|
|
async function initialize() {
|
|
debug('initialize: start platform');
|
|
|
|
await database.initialize();
|
|
await tasks.stopAllTasks(); // when box code crashes, systemd will clean up the control-group but not the tasks
|
|
await locks.releaseAll();
|
|
|
|
// always generate webadmin config since we have no versioning mechanism for the ejs
|
|
const dashboardLocation = await dashboard.getLocation();
|
|
if (dashboardLocation.domain) await onDashboardLocationSet(dashboardLocation.subdomain, dashboardLocation.domain);
|
|
|
|
// configure nginx to be reachable by IP when not activated. for the moment, the IP based redirect exists even after domain is setup
|
|
// just in case user forgot or some network error happenned in the middle (then browser refresh takes you to activation page)
|
|
// we remove the config as a simple security measure to not expose IP <-> domain
|
|
const activated = await users.isActivated();
|
|
if (!activated) {
|
|
debug('start: not activated. generating IP based redirection config');
|
|
await safe(reverseProxy.writeDefaultConfig({ activated: false }), { debug }); // ok to fail if no disk space
|
|
}
|
|
|
|
await updater.notifyUpdate();
|
|
|
|
if (await users.isActivated()) safe(onActivated({ skipDnsSetup: false }), { debug }); // run in background
|
|
}
|
|
|
|
async function uninitialize() {
|
|
debug('uninitializing platform');
|
|
|
|
if (await users.isActivated()) await onDeactivated();
|
|
|
|
await tasks.stopAllTasks(); // when box code is stopped/restarted, we get a chance to cleanup all the sudo+tasks
|
|
await database.uninitialize();
|
|
}
|
|
|
|
async function onActivated(restoreOptions) {
|
|
assert.strictEqual(typeof restoreOptions, 'object');
|
|
|
|
debug('onActivated: starting post activation services');
|
|
|
|
// Starting the infra after a user is available means:
|
|
// 1. mail bounces can now be sent to the cloudron owner
|
|
// 2. the restore code path can run without sudo (since mail/ is non-root)
|
|
await startInfra(restoreOptions);
|
|
await cron.startJobs();
|
|
await dockerProxy.start(); // this relies on the 'cloudron' docker network interface to be available
|
|
|
|
// disable responding to api calls via IP to not leak domain info. this is carefully placed as the last item, so it buys
|
|
// the UI some time to query the dashboard domain in the restore code path
|
|
if (!constants.TEST) await timers.setTimeout(30000);
|
|
await reverseProxy.writeDefaultConfig({ activated :true });
|
|
|
|
debug('onActivated: finished');
|
|
}
|
|
|
|
async function onDeactivated() {
|
|
debug('onDeactivated: stopping post activation services');
|
|
|
|
await cron.stopJobs();
|
|
await dockerProxy.stop();
|
|
await oidcServer.stop();
|
|
}
|
|
|
|
async function onDashboardLocationSet(subdomain, domain) {
|
|
assert.strictEqual(typeof subdomain, 'string');
|
|
assert.strictEqual(typeof domain, 'string');
|
|
|
|
await safe(reverseProxy.writeDashboardConfig(subdomain, domain), { debug }); // ok to fail if no disk space
|
|
await oidcServer.stop();
|
|
await oidcServer.start();
|
|
}
|
|
|
|
async function onDashboardLocationChanged(auditSource) {
|
|
assert.strictEqual(typeof auditSource, 'object');
|
|
|
|
// mark all apps to be reconfigured, all have ExtraHosts injected
|
|
const [, installedApps] = await safe(apps.list());
|
|
await safe(apps.configureApps(installedApps, { scheduleNow: true }, auditSource), { debug });
|
|
|
|
await safe(services.rebuildService('turn', auditSource), { debug }); // to update the realm variable
|
|
}
|
|
|
|
async function onMailServerLocationChanged(auditSource) {
|
|
assert.strictEqual(typeof auditSource, 'object');
|
|
|
|
// mark apps using email addon to be reconfigured
|
|
const [, installedApps] = await safe(apps.list());
|
|
const appsUsingEmail = installedApps.filter((a) => !!a.manifest.addons?.email || a.manifest.addons?.sendmail?.requiresValidCertificate);
|
|
await safe(apps.configureApps(appsUsingEmail, { scheduleNow: true }, auditSource), { debug });
|
|
}
|
|
|
|
async function onMailServerIncomingDomainsChanged(auditSource) {
|
|
assert.strictEqual(typeof auditSource, 'object');
|
|
|
|
// mark apps using email addon to be reconfigured
|
|
const [, installedApps] = await safe(apps.list());
|
|
const appsUsingEmail = installedApps.filter((a) => !!a.manifest.addons?.email);
|
|
await safe(apps.configureApps(appsUsingEmail, { scheduleNow: true }, auditSource), { debug });
|
|
}
|