diff --git a/src/apptaskmanager.js b/src/apptaskmanager.js index 911a73a05..a1df3c908 100644 --- a/src/apptaskmanager.js +++ b/src/apptaskmanager.js @@ -23,7 +23,7 @@ const TASK_CONCURRENCY = 3; function waitText(lockOperation) { if (lockOperation === locker.OP_BOX_UPDATE) return 'Waiting for Cloudron to finish updating. See the Settings view'; - if (lockOperation === locker.OP_PLATFORM_START) return 'Waiting for Cloudron to initialize'; + if (lockOperation === locker.OP_INFRA_START) return 'Waiting for Platform Services to start. See the Services view'; if (lockOperation === locker.OP_FULL_BACKUP) return 'Waiting for Cloudron to finish backup. See the Backups view'; return ''; // cannot happen diff --git a/src/cloudron.js b/src/cloudron.js index a1538dca7..fc5c3d767 100644 --- a/src/cloudron.js +++ b/src/cloudron.js @@ -1,14 +1,9 @@ 'use strict'; exports = module.exports = { - initialize, - uninitialize, - getStatus, getConfig, - onActivated, - setupDnsAndCert, prepareDashboardDomain, @@ -25,7 +20,6 @@ exports = module.exports = { const apps = require('./apps.js'), appstore = require('./appstore.js'), assert = require('assert'), - AuditSource = require('./auditsource.js'), BoxError = require('./boxerror.js'), branding = require('./branding.js'), constants = require('./constants.js'), @@ -33,106 +27,20 @@ const apps = require('./apps.js'), debug = require('debug')('box:cloudron'), dashboard = require('./dashboard.js'), dns = require('./dns.js'), - dockerProxy = require('./dockerproxy.js'), eventlog = require('./eventlog.js'), mailServer = require('./mailserver.js'), moment = require('moment-timezone'), network = require('./network.js'), oidc = require('./oidc.js'), - paths = require('./paths.js'), - platform = require('./platform.js'), reverseProxy = require('./reverseproxy.js'), safe = require('safetydance'), services = require('./services.js'), settings = require('./settings.js'), system = require('./system.js'), tasks = require('./tasks.js'), - timers = require('timers/promises'), translation = require('./translation.js'), users = require('./users.js'); -async function initialize() { - safe(runStartupTasks(), { debug }); // background - - await notifyUpdate(); -} - -async function uninitialize() { - await cron.stopJobs(); - await dockerProxy.stop(); - await platform.stopAllTasks(); -} - -async function onActivated(options) { - assert.strictEqual(typeof options, 'object'); - - debug('onActivated: running post activation tasks'); - - // Starting the platform after a user is available means: - // 1. mail bounces can now be sent to the cloudron owner - // 2. the restore code path can run without sudo (since mail/ is non-root) - await platform.start(options); - await cron.startJobs(); - await dockerProxy.start(); // this relies on the 'cloudron' docker network interface to be available - await oidc.start(); // this requires dashboardFqdn to be set - - // disable responding to api calls via IP to not leak domain info. this is carefully placed as the last item, so it buys - // the UI some time to query the dashboard domain in the restore code path - await timers.setTimeout(30000); - await reverseProxy.writeDefaultConfig({ activated :true }); -} - -async function notifyUpdate() { - const version = safe.fs.readFileSync(paths.VERSION_FILE, 'utf8'); - if (version === constants.VERSION) return; - - if (!version) { - await eventlog.add(eventlog.ACTION_INSTALL_FINISH, AuditSource.CRON, { version: constants.VERSION }); - } else { - await eventlog.add(eventlog.ACTION_UPDATE_FINISH, AuditSource.CRON, { errorMessage: '', oldVersion: version || 'dev', newVersion: constants.VERSION }); - const [error] = await safe(tasks.setCompletedByType(tasks.TASK_UPDATE, { error: null })); - if (error && error.reason !== BoxError.NOT_FOUND) throw error; // when hotfixing, task may not exist - } - - safe.fs.writeFileSync(paths.VERSION_FILE, constants.VERSION, 'utf8'); -} - -// each of these tasks can fail. we will add some routes to fix/re-run them -async function runStartupTasks() { - const tasks = []; - - // stop all the systemd tasks - tasks.push(platform.stopAllTasks); - - // always generate webadmin config since we have no versioning mechanism for the ejs - tasks.push(async function () { - if (!await dashboard.getDomain()) return; - - await reverseProxy.writeDashboardConfig(await dashboard.getDomain()); - }); - - tasks.push(async function () { - // check activation state and start the platform - const activated = await users.isActivated(); - - // configure nginx to be reachable by IP when not activated. for the moment, the IP based redirect exists even after domain is setup - // just in case user forgot or some network error happenned in the middle (then browser refresh takes you to activation page) - // we remove the config as a simple security measure to not expose IP <-> domain - if (!activated) { - debug('runStartupTasks: not activated. generating IP based redirection config'); - return await reverseProxy.writeDefaultConfig({ activated: false }); - } - - await onActivated({}); - }); - - // we used to run tasks in parallel but simultaneous nginx reloads was causing issues - for (let i = 0; i < tasks.length; i++) { - const [error] = await safe(tasks[i]()); - if (error) debug(`Startup task at index ${i} failed: ${error.message} ${error.stack}`); - } -} - async function getStatus() { return { version: constants.VERSION, diff --git a/src/locker.js b/src/locker.js index ede417256..8f817c6b6 100644 --- a/src/locker.js +++ b/src/locker.js @@ -16,7 +16,7 @@ util.inherits(Locker, EventEmitter); // these are mutually exclusive operations Locker.prototype.OP_BOX_UPDATE = 'box_update'; -Locker.prototype.OP_PLATFORM_START = 'platform_start'; +Locker.prototype.OP_INFRA_START = 'infra_start'; Locker.prototype.OP_FULL_BACKUP = 'full_backup'; Locker.prototype.OP_APPTASK = 'apptask'; diff --git a/src/platform.js b/src/platform.js index 5460a077e..e50f3aed6 100644 --- a/src/platform.js +++ b/src/platform.js @@ -1,8 +1,10 @@ 'use strict'; exports = module.exports = { - start, - stopAllTasks, + initialize, + uninitialize, + + onActivated, getStatus }; @@ -12,11 +14,16 @@ const apps = require('./apps.js'), AuditSource = require('./auditsource.js'), BoxError = require('./boxerror.js'), constants = require('./constants.js'), + cron = require('./cron.js'), + dashboard = require('./dashboard.js'), + database = require('./database.js'), debug = require('debug')('box:platform'), docker = require('./docker.js'), + dockerProxy = require('./dockerproxy.js'), fs = require('fs'), infra = require('./infra_version.js'), locker = require('./locker.js'), + oidc = require('./oidc.js'), paths = require('./paths.js'), reverseProxy = require('./reverseproxy.js'), safe = require('safetydance'), @@ -24,6 +31,8 @@ const apps = require('./apps.js'), shell = require('./shell.js'), tasks = require('./tasks.js'), timers = require('timers/promises'), + updater = require('./updater.js'), + users = require('./users.js'), volumes = require('./volumes.js'), _ = require('underscore'); @@ -33,70 +42,6 @@ function getStatus() { return { message: gStatusMessage }; } -async function start(options) { - if (process.env.BOX_ENV === 'test' && !process.env.TEST_CREATE_INFRA) return; - - debug('initializing platform'); - - let existingInfra = { version: 'none' }; - if (fs.existsSync(paths.INFRA_VERSION_FILE)) { - existingInfra = safe.JSON.parse(fs.readFileSync(paths.INFRA_VERSION_FILE, 'utf8')); - if (!existingInfra) existingInfra = { version: 'corrupt' }; - } - - // short-circuit for the restart case - if (_.isEqual(infra, existingInfra)) { - debug('platform is uptodate at version %s', infra.version); - await onPlatformReady(false /* !infraChanged */); - return; - } - - debug('Updating infrastructure from %s to %s', existingInfra.version, infra.version); - - const error = locker.lock(locker.OP_PLATFORM_START); - if (error) throw error; - - for (let attempt = 0; attempt < 5; attempt++) { - try { - if (existingInfra.version !== infra.version) { - gStatusMessage = 'Removing containers for upgrade'; - await removeAllContainers(); - await createDockerNetwork(); - } - if (existingInfra.version === 'none') await volumes.mountAll(); // when restoring, mount all volumes - await markApps(existingInfra, options); // mark app state before we start addons. this gives the db import logic a chance to mark an app as errored - gStatusMessage = 'Starting services, this can take a while'; - await services.startServices(existingInfra); - await fs.promises.writeFile(paths.INFRA_VERSION_FILE, JSON.stringify(infra, null, 4)); - break; - } catch (error) { - // for some reason, mysql arbitrary restarts making startup tasks fail. this makes the box update stuck - // LOST is when existing connection breaks. REFUSED is when new connection cannot connect at all - const retry = error.reason === BoxError.DATABASE_ERROR && (error.code === 'PROTOCOL_CONNECTION_LOST' || error.code === 'ECONNREFUSED'); - debug(`Failed to start services. retry=${retry} (attempt ${attempt}): ${error.message}`); - if (!retry) throw error; // refuse to start - await timers.setTimeout(10000); - } - } - - locker.unlock(locker.OP_PLATFORM_START); - - await onPlatformReady(true /* infraChanged */); -} - -async function stopAllTasks() { - await tasks.stopAllTasks(); -} - -async function onPlatformReady(infraChanged) { - debug(`onPlatformReady: platform is ready. infra changed: ${infraChanged}`); - gStatusMessage = 'Ready'; - - if (infraChanged) await safe(pruneInfraImages(), { debug }); // ignore error - - await apps.schedulePendingTasks(AuditSource.PLATFORM); -} - async function pruneInfraImages() { debug('pruneInfraImages: checking existing images'); @@ -142,13 +87,13 @@ async function removeAllContainers() { await shell.promises.exec('removeAllContainers', 'docker ps -qa --filter \'label=isCloudronManaged\' | xargs --no-run-if-empty docker rm -f'); } -async function markApps(existingInfra, options) { +async function markApps(existingInfra, restoreOptions) { assert.strictEqual(typeof existingInfra, 'object'); - assert.strictEqual(typeof options, 'object'); + assert.strictEqual(typeof restoreOptions, 'object'); if (existingInfra.version === 'none') { // cloudron is being restored from backup debug('markApps: restoring installed apps'); - await apps.restoreInstalledApps(options, AuditSource.PLATFORM); + await apps.restoreInstalledApps(restoreOptions, AuditSource.PLATFORM); } else if (existingInfra.version !== infra.version) { debug('markApps: reconfiguring installed apps'); reverseProxy.removeAppConfigs(); // should we change the cert location, nginx will not start @@ -169,3 +114,117 @@ async function markApps(existingInfra, options) { } } } + +async function onInfraReady(infraChanged) { + debug(`onInfraReady: platform is ready. infra changed: ${infraChanged}`); + gStatusMessage = 'Ready'; + + if (infraChanged) await safe(pruneInfraImages(), { debug }); // ignore error + + await apps.schedulePendingTasks(AuditSource.PLATFORM); +} + +async function startInfra(restoreOptions) { + assert.strictEqual(typeof restoreOptions, 'object'); + + if (process.env.BOX_ENV === 'test' && !process.env.TEST_CREATE_INFRA) return; + + debug('startInfra: checking infrastructure'); + + let existingInfra = { version: 'none' }; + if (fs.existsSync(paths.INFRA_VERSION_FILE)) { + existingInfra = safe.JSON.parse(fs.readFileSync(paths.INFRA_VERSION_FILE, 'utf8')); + if (!existingInfra) existingInfra = { version: 'corrupt' }; + } + + // short-circuit for the restart case + if (_.isEqual(infra, existingInfra)) { + debug('startInfra: infra is uptodate at version %s', infra.version); + await onInfraReady(false /* !infraChanged */); + return; + } + + debug(`startInfra: updating infrastructure from ${existingInfra.version} to ${infra.version}`); + + const error = locker.lock(locker.OP_INFRA_START); + if (error) throw error; + + for (let attempt = 0; attempt < 5; attempt++) { + try { + if (existingInfra.version !== infra.version) { + gStatusMessage = 'Removing containers for upgrade'; + await removeAllContainers(); + await createDockerNetwork(); + } + if (existingInfra.version === 'none') await volumes.mountAll(); // when restoring, mount all volumes + await markApps(existingInfra, restoreOptions); // mark app state before we start addons. this gives the db import logic a chance to mark an app as errored + gStatusMessage = 'Starting services, this can take a while'; + await services.startServices(existingInfra); + await fs.promises.writeFile(paths.INFRA_VERSION_FILE, JSON.stringify(infra, null, 4)); + break; + } catch (error) { + // for some reason, mysql arbitrary restarts making startup tasks fail. this makes the box update stuck + // LOST is when existing connection breaks. REFUSED is when new connection cannot connect at all + const retry = error.reason === BoxError.DATABASE_ERROR && (error.code === 'PROTOCOL_CONNECTION_LOST' || error.code === 'ECONNREFUSED'); + debug(`startInfra: Failed to start services. retry=${retry} (attempt ${attempt}): ${error.message}`); + if (!retry) throw error; // refuse to start + await timers.setTimeout(10000); + } + } + + locker.unlock(locker.OP_INFRA_START); + + await onInfraReady(true /* infraChanged */); +} + +async function initialize() { + debug('initializing platform'); + + await database.initialize(); + await tasks.stopAllTasks(); + + // always generate webadmin config since we have no versioning mechanism for the ejs + const { domain:dashboardDomain } = await dashboard.getLocation(); + if (dashboardDomain) await safe(reverseProxy.writeDashboardConfig(dashboardDomain), { debug }); // ok to fail if no disk space + + // configure nginx to be reachable by IP when not activated. for the moment, the IP based redirect exists even after domain is setup + // just in case user forgot or some network error happenned in the middle (then browser refresh takes you to activation page) + // we remove the config as a simple security measure to not expose IP <-> domain + const activated = await users.isActivated(); + if (!activated) { + debug('start: not activated. generating IP based redirection config'); + await safe(reverseProxy.writeDefaultConfig({ activated: false }), { debug }); // ok to fail if no disk space + } + + await updater.notifyUpdate(); + + if (await users.isActivated()) safe(onActivated({ skipDnsSetup: false }), { debug }); // run in background +} + +async function uninitialize() { + debug('uninitializing platform'); + + await cron.stopJobs(); + await dockerProxy.stop(); + await tasks.stopAllTasks(); + await database.uninitialize(); +} + +async function onActivated(restoreOptions) { + assert.strictEqual(typeof restoreOptions, 'object'); + + debug('onActivated: starting post activation services'); + + // Starting the infra after a user is available means: + // 1. mail bounces can now be sent to the cloudron owner + // 2. the restore code path can run without sudo (since mail/ is non-root) + await startInfra(restoreOptions); + await cron.startJobs(); + await dockerProxy.start(); // this relies on the 'cloudron' docker network interface to be available + await oidc.start(); // this requires dashboardFqdn to be set + + // disable responding to api calls via IP to not leak domain info. this is carefully placed as the last item, so it buys + // the UI some time to query the dashboard domain in the restore code path + await timers.setTimeout(30000); + await reverseProxy.writeDefaultConfig({ activated :true }); +} diff --git a/src/provision.js b/src/provision.js index 3fa68f94d..6d3c64458 100644 --- a/src/provision.js +++ b/src/provision.js @@ -22,6 +22,7 @@ const assert = require('assert'), mailServer = require('./mailserver.js'), mounts = require('./mounts.js'), network = require('./network.js'), + platform = require('./platform.js'), reverseProxy = require('./reverseproxy.js'), safe = require('safetydance'), semver = require('semver'), @@ -147,7 +148,7 @@ async function activate(username, password, email, displayName, ip, auditSource) await eventlog.add(eventlog.ACTION_ACTIVATE, auditSource, {}); - setImmediate(() => safe(cloudron.onActivated({}), { debug })); + setImmediate(() => safe(platform.onActivated({ skipDnsSetup: false }), { debug })); return { userId: ownerId, @@ -184,7 +185,7 @@ async function restoreTask(backupConfig, remotePath, ipv4Config, options, auditS await backups.setConfig(backupConfig); await eventlog.add(eventlog.ACTION_RESTORE, auditSource, { remotePath }); - setImmediate(() => safe(cloudron.onActivated(options), { debug })); + setImmediate(() => safe(platform.onActivated({ skipDnsSetup: options.skipDnsSetup }), { debug })); } catch (error) { gStatus.restore.errorMessage = error ? error.message : ''; } diff --git a/src/server.js b/src/server.js index 6fc1ad175..c0ab35935 100644 --- a/src/server.js +++ b/src/server.js @@ -6,14 +6,13 @@ exports = module.exports = { }; const assert = require('assert'), - cloudron = require('./cloudron.js'), constants = require('./constants.js'), - database = require('./database.js'), debug = require('debug')('box:server'), eventlog = require('./eventlog.js'), express = require('express'), http = require('http'), middleware = require('./middleware'), + platform = require('./platform.js'), routes = require('./routes/index.js'), safe = require('safetydance'), users = require('./users.js'), @@ -460,8 +459,7 @@ async function start() { debug(` Cloudron ${constants.VERSION} `); debug('=========================================='); - await database.initialize(); - await cloudron.initialize(); + await platform.initialize(); gHttpServer = await initializeExpressSync(); @@ -472,8 +470,7 @@ async function start() { async function stop() { if (!gHttpServer) return; - await cloudron.uninitialize(); - await database.uninitialize(); + await platform.uninitialize(); await util.promisify(gHttpServer.close.bind(gHttpServer))(); gHttpServer = null; diff --git a/src/updater.js b/src/updater.js index 44f441e94..e44346b95 100644 --- a/src/updater.js +++ b/src/updater.js @@ -5,11 +5,14 @@ exports = module.exports = { getAutoupdatePattern, updateToLatest, - update + update, + + notifyUpdate }; const apps = require('./apps.js'), assert = require('assert'), + AuditSource = require('./auditsource.js'), BoxError = require('./boxerror.js'), backups = require('./backups.js'), backuptask = require('./backuptask.js'), @@ -237,3 +240,18 @@ async function updateToLatest(options, auditSource) { return taskId; } + +async function notifyUpdate() { + const version = safe.fs.readFileSync(paths.VERSION_FILE, 'utf8'); + if (version === constants.VERSION) return; + + if (!version) { + await eventlog.add(eventlog.ACTION_INSTALL_FINISH, AuditSource.CRON, { version: constants.VERSION }); + } else { + await eventlog.add(eventlog.ACTION_UPDATE_FINISH, AuditSource.CRON, { errorMessage: '', oldVersion: version || 'dev', newVersion: constants.VERSION }); + const [error] = await safe(tasks.setCompletedByType(tasks.TASK_UPDATE, { error: null })); + if (error && error.reason !== BoxError.NOT_FOUND) throw error; // when hotfixing, task may not exist + } + + safe.fs.writeFileSync(paths.VERSION_FILE, constants.VERSION, 'utf8'); +}