2016-05-24 09:40:26 -07:00
'use strict' ;
exports = module . exports = {
2023-08-12 19:28:07 +05:30
initialize ,
uninitialize ,
onActivated ,
2024-04-27 11:36:57 +02:00
onDashboardLocationSet ,
2023-08-13 10:29:24 +05:30
onDashboardLocationChanged ,
2023-08-21 18:18:03 +05:30
onMailServerLocationChanged ,
2025-03-10 21:14:55 +01:00
onMailServerIncomingDomainsChanged ,
2018-01-26 22:35:08 -08:00
2022-11-30 19:54:32 +01:00
getStatus
2016-05-24 09:40:26 -07:00
} ;
2021-01-21 11:31:35 -08:00
const apps = require ( './apps.js' ) ,
2024-12-09 14:36:19 +01:00
appTaskManager = require ( './apptaskmanager.js' ) ,
2025-08-14 11:17:38 +05:30
assert = require ( 'node:assert' ) ,
2021-11-17 10:33:28 -08:00
AuditSource = require ( './auditsource.js' ) ,
2021-11-02 22:30:38 -07:00
BoxError = require ( './boxerror.js' ) ,
2023-02-21 12:03:58 +01:00
constants = require ( './constants.js' ) ,
2023-08-12 19:28:07 +05:30
cron = require ( './cron.js' ) ,
dashboard = require ( './dashboard.js' ) ,
database = require ( './database.js' ) ,
2016-05-24 09:40:26 -07:00
debug = require ( 'debug' ) ( 'box:platform' ) ,
2023-08-12 19:28:07 +05:30
dockerProxy = require ( './dockerproxy.js' ) ,
2025-08-14 11:17:38 +05:30
fs = require ( 'node:fs' ) ,
2016-05-24 13:10:18 -07:00
infra = require ( './infra_version.js' ) ,
remove global lock
Currently, the update/apptask/fullbackup/platformstart take a
global lock and cannot run in parallel. This causes situations
where when a user tries to trigger an apptask, it says "waiting for
backup to finish..." etc
The solution is to let them run in parallel. We need a lock at the
app level as app operations running in parallel would be bad (tm).
In addition, the update task needs a lock just for the update part.
We also need multi-process locks. Running tasks as processes is core
to our "kill" strategy.
Various inter process locks were explored:
* node's IPC mechanism with process.send(). But this only works for direct node.js
children. taskworker is run via sudo and the IPC does not work.
* File lock using O_EXCL. Basic ideas to create lock files. While file creation
can be done atomically, it becomes complicated to clean up lock files when
the tasks crash. We need a way to know what locks were held by the crashing task.
flock and friends are not built-into node.js
* sqlite/redis were options but introduce additional deps
* Settled on MySQL based locking. Initial plan was to have row locks
or table locks. Each row is a kind of lock. While implementing, it was found that
we need many types of locks (and not just update lock and app locks). For example,
we need locks for each task type, so that only one task type is active at a time.
* Instead of rows, we can just lock table and have a json blob in it. This hit a road
block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e
when issing two db.query() it might use two different connections from the pool. We have to
expose the connection, release connection etc.
* Next idea was atomic blob update of the blob checking if old blob was same. This approach,
was finally refined into a version field.
Phew!
2024-12-07 14:35:45 +01:00
locks = require ( './locks.js' ) ,
2025-06-11 22:00:09 +02:00
oidcServer = require ( './oidcserver.js' ) ,
2016-05-24 09:40:26 -07:00
paths = require ( './paths.js' ) ,
2018-01-30 12:23:27 -08:00
reverseProxy = require ( './reverseproxy.js' ) ,
2016-05-24 13:10:18 -07:00
safe = require ( 'safetydance' ) ,
2021-01-21 11:31:35 -08:00
services = require ( './services.js' ) ,
2024-10-14 19:10:31 +02:00
shell = require ( './shell.js' ) ( 'platform' ) ,
2019-08-28 15:00:55 -07:00
tasks = require ( './tasks.js' ) ,
2023-05-14 10:53:50 +02:00
timers = require ( 'timers/promises' ) ,
2023-08-12 19:28:07 +05:30
updater = require ( './updater.js' ) ,
users = require ( './users.js' ) ,
2021-09-28 11:51:01 -07:00
volumes = require ( './volumes.js' ) ,
2025-02-13 14:03:25 +01:00
_ = require ( './underscore.js' ) ;
2016-05-24 09:40:26 -07:00
2022-11-30 19:54:32 +01:00
let gStatusMessage = 'Initializing' ;
function getStatus ( ) {
2025-07-01 11:44:02 +02:00
return { message : gStatusMessage , isReady : gStatusMessage === 'Ready' } ;
2022-11-30 19:54:32 +01:00
}
2024-04-04 11:36:04 +02:00
async function pruneVolumes ( ) {
debug ( 'pruneVolumes: remove all unused local volumes' ) ;
2025-10-13 10:32:08 +02:00
const [ error ] = await safe ( shell . spawn ( 'docker' , [ 'volume' , 'prune' , '--all' , '--force' ] , { encoding : 'utf8' } ) ) ;
2025-10-07 09:35:54 +02:00
if ( error ) debug ( ` pruneVolumes: error pruning volumes: ${ error . mesage } ` ) ;
2024-04-04 11:36:04 +02:00
}
2022-02-09 17:28:46 -08:00
async function createDockerNetwork ( ) {
debug ( 'createDockerNetwork: recreating docker network' ) ;
2024-10-15 10:10:15 +02:00
await shell . spawn ( 'docker' , [ 'network' , 'rm' , '-f' , 'cloudron' ] , { } ) ;
2022-02-09 17:47:48 -08:00
// the --ipv6 option will work even in ipv6 is disabled. fd00 is IPv6 ULA
2024-10-15 10:10:15 +02:00
await shell . spawn ( 'docker' , [ 'network' , 'create' , ` --subnet= ${ constants . DOCKER _IPv4 _SUBNET } ` , ` --ip-range= ${ constants . DOCKER _IPv4 _RANGE } ` ,
2025-10-13 10:32:08 +02:00
` --gateway= ${ constants . DOCKER _IPv4 _GATEWAY } ` , '--ipv6' , ` --subnet= ${ constants . DOCKER _IPv6 _SUBNET } ` , 'cloudron' ] , { encoding : 'utf8' } ) ;
2022-02-09 17:28:46 -08:00
}
2021-09-07 09:57:49 -07:00
async function removeAllContainers ( ) {
2020-07-30 14:36:11 -07:00
debug ( 'removeAllContainers: removing all containers for infra upgrade' ) ;
2016-07-25 00:39:57 -07:00
2024-10-16 10:25:07 +02:00
const output = await shell . spawn ( 'docker' , [ 'ps' , '-qa' , '--filter' , 'label=isCloudronManaged' ] , { encoding : 'utf8' } ) ;
2024-11-05 13:05:12 +01:00
if ( ! output ) return ;
2024-10-16 10:25:07 +02:00
for ( const containerId of output . trim ( ) . split ( '\n' ) ) {
debug ( ` removeAllContainers: stopping and removing ${ containerId } ` ) ;
2025-10-13 10:32:08 +02:00
await shell . spawn ( 'docker' , [ 'stop' , containerId ] , { encoding : 'utf8' } ) ;
await shell . spawn ( 'docker' , [ 'rm' , '-f' , containerId ] , { encoding : 'utf8' } ) ;
2024-10-16 10:25:07 +02:00
}
2016-05-24 10:52:55 -07:00
}
2016-05-24 10:58:18 -07:00
2023-08-12 19:28:07 +05:30
async function markApps ( existingInfra , restoreOptions ) {
2021-02-24 14:56:09 -08:00
assert . strictEqual ( typeof existingInfra , 'object' ) ;
2025-09-12 09:48:37 +02:00
assert . strictEqual ( typeof restoreOptions , 'object' ) ; // { backupSite, skipDnsSetup }
2021-02-24 14:56:09 -08:00
2018-11-10 18:21:15 -08:00
if ( existingInfra . version === 'none' ) { // cloudron is being restored from backup
2023-08-21 18:18:03 +05:30
debug ( 'markApps: restoring apps' ) ;
await apps . restoreApps ( await apps . list ( ) , restoreOptions , AuditSource . PLATFORM ) ;
2018-10-16 11:04:34 -07:00
} else if ( existingInfra . version !== infra . version ) {
2023-08-21 18:18:03 +05:30
debug ( 'markApps: reconfiguring apps' ) ;
2018-01-30 12:23:27 -08:00
reverseProxy . removeAppConfigs ( ) ; // should we change the cert location, nginx will not start
2023-08-21 18:18:03 +05:30
await apps . configureApps ( await apps . list ( ) , { scheduleNow : false } , AuditSource . PLATFORM ) ; // we will schedule it when infra is ready
2018-10-16 11:04:34 -07:00
} else {
2024-04-27 10:48:23 +02:00
const changedAddons = [ ] ;
2023-08-08 10:42:16 +05:30
if ( infra . images . mysql !== existingInfra . images . mysql ) changedAddons . push ( 'mysql' ) ;
if ( infra . images . postgresql !== existingInfra . images . postgresql ) changedAddons . push ( 'postgresql' ) ;
if ( infra . images . mongodb !== existingInfra . images . mongodb ) changedAddons . push ( 'mongodb' ) ;
if ( infra . images . redis !== existingInfra . images . redis ) changedAddons . push ( 'redis' ) ;
2020-05-22 16:43:16 -07:00
if ( changedAddons . length ) {
// restart apps if docker image changes since the IP changes and any "persistent" connections fail
2020-07-30 14:09:25 -07:00
debug ( ` markApps: changedAddons: ${ JSON . stringify ( changedAddons ) } ` ) ;
2021-11-17 10:33:28 -08:00
await apps . restartAppsUsingAddons ( changedAddons , AuditSource . PLATFORM ) ;
2020-05-22 16:43:16 -07:00
} else {
2020-07-30 14:09:25 -07:00
debug ( 'markApps: apps are already uptodate' ) ;
2020-05-22 16:43:16 -07:00
}
2016-07-25 18:57:54 -07:00
}
}
2023-08-12 19:28:07 +05:30
async function onInfraReady ( infraChanged ) {
debug ( ` onInfraReady: platform is ready. infra changed: ${ infraChanged } ` ) ;
gStatusMessage = 'Ready' ;
2024-12-14 20:47:35 +01:00
if ( infraChanged ) await safe ( pruneVolumes ( ) , { debug } ) ; // ignore error
2023-08-12 19:28:07 +05:30
await apps . schedulePendingTasks ( AuditSource . PLATFORM ) ;
2024-12-09 14:36:19 +01:00
await appTaskManager . start ( ) ;
2023-08-12 19:28:07 +05:30
}
async function startInfra ( restoreOptions ) {
2025-09-12 09:48:37 +02:00
assert . strictEqual ( typeof restoreOptions , 'object' ) ; // { backupSite, skipDnsSetup }
2023-08-12 19:28:07 +05:30
2023-10-01 13:52:19 +05:30
if ( constants . TEST && ! process . env . TEST _CREATE _INFRA ) return ;
2023-08-12 19:28:07 +05:30
debug ( 'startInfra: checking infrastructure' ) ;
let existingInfra = { version : 'none' } ;
if ( fs . existsSync ( paths . INFRA _VERSION _FILE ) ) {
existingInfra = safe . JSON . parse ( fs . readFileSync ( paths . INFRA _VERSION _FILE , 'utf8' ) ) ;
if ( ! existingInfra ) existingInfra = { version : 'corrupt' } ;
}
// short-circuit for the restart case
if ( _ . isEqual ( infra , existingInfra ) ) {
debug ( 'startInfra: infra is uptodate at version %s' , infra . version ) ;
await onInfraReady ( false /* !infraChanged */ ) ;
return ;
}
debug ( ` startInfra: updating infrastructure from ${ existingInfra . version } to ${ infra . version } ` ) ;
for ( let attempt = 0 ; attempt < 5 ; attempt ++ ) {
try {
2024-12-09 14:50:31 +01:00
await markApps ( existingInfra , restoreOptions ) ; // mark app state before we start addons. this gives the db import logic a chance to mark an app as errored
gStatusMessage = 'Updating platform, this can take a while' ;
2023-08-12 19:28:07 +05:30
if ( existingInfra . version !== infra . version ) {
gStatusMessage = 'Removing containers for upgrade' ;
await removeAllContainers ( ) ;
await createDockerNetwork ( ) ;
}
if ( existingInfra . version === 'none' ) await volumes . mountAll ( ) ; // when restoring, mount all volumes
gStatusMessage = 'Starting services, this can take a while' ;
await services . startServices ( existingInfra ) ;
await fs . promises . writeFile ( paths . INFRA _VERSION _FILE , JSON . stringify ( infra , null , 4 ) ) ;
break ;
} catch ( error ) {
// for some reason, mysql arbitrary restarts making startup tasks fail. this makes the box update stuck
// LOST is when existing connection breaks. REFUSED is when new connection cannot connect at all
const retry = error . reason === BoxError . DATABASE _ERROR && ( error . code === 'PROTOCOL_CONNECTION_LOST' || error . code === 'ECONNREFUSED' ) ;
debug ( ` startInfra: Failed to start services. retry= ${ retry } (attempt ${ attempt } ): ${ error . message } ` ) ;
if ( ! retry ) throw error ; // refuse to start
await timers . setTimeout ( 10000 ) ;
}
}
await onInfraReady ( true /* infraChanged */ ) ;
}
async function initialize ( ) {
2024-01-30 11:52:59 +01:00
debug ( 'initialize: start platform' ) ;
2023-08-12 19:28:07 +05:30
await database . initialize ( ) ;
2025-06-17 22:30:34 +02:00
await tasks . stopAllTasks ( ) ; // when box code crashes, systemd will clean up the control-group but not the tasks
remove global lock
Currently, the update/apptask/fullbackup/platformstart take a
global lock and cannot run in parallel. This causes situations
where when a user tries to trigger an apptask, it says "waiting for
backup to finish..." etc
The solution is to let them run in parallel. We need a lock at the
app level as app operations running in parallel would be bad (tm).
In addition, the update task needs a lock just for the update part.
We also need multi-process locks. Running tasks as processes is core
to our "kill" strategy.
Various inter process locks were explored:
* node's IPC mechanism with process.send(). But this only works for direct node.js
children. taskworker is run via sudo and the IPC does not work.
* File lock using O_EXCL. Basic ideas to create lock files. While file creation
can be done atomically, it becomes complicated to clean up lock files when
the tasks crash. We need a way to know what locks were held by the crashing task.
flock and friends are not built-into node.js
* sqlite/redis were options but introduce additional deps
* Settled on MySQL based locking. Initial plan was to have row locks
or table locks. Each row is a kind of lock. While implementing, it was found that
we need many types of locks (and not just update lock and app locks). For example,
we need locks for each task type, so that only one task type is active at a time.
* Instead of rows, we can just lock table and have a json blob in it. This hit a road
block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e
when issing two db.query() it might use two different connections from the pool. We have to
expose the connection, release connection etc.
* Next idea was atomic blob update of the blob checking if old blob was same. This approach,
was finally refined into a version field.
Phew!
2024-12-07 14:35:45 +01:00
await locks . releaseAll ( ) ;
2023-08-12 19:28:07 +05:30
// always generate webadmin config since we have no versioning mechanism for the ejs
2024-04-27 11:10:24 +02:00
const dashboardLocation = await dashboard . getLocation ( ) ;
if ( dashboardLocation . domain ) await onDashboardLocationSet ( dashboardLocation . subdomain , dashboardLocation . domain ) ;
2023-08-12 19:28:07 +05:30
// configure nginx to be reachable by IP when not activated. for the moment, the IP based redirect exists even after domain is setup
// just in case user forgot or some network error happenned in the middle (then browser refresh takes you to activation page)
// we remove the config as a simple security measure to not expose IP <-> domain
const activated = await users . isActivated ( ) ;
if ( ! activated ) {
2025-08-11 19:30:22 +05:30
debug ( 'initialize: not activated. generating IP based redirection config' ) ;
2023-08-12 19:28:07 +05:30
await safe ( reverseProxy . writeDefaultConfig ( { activated : false } ) , { debug } ) ; // ok to fail if no disk space
}
2025-06-20 19:04:55 +02:00
await updater . notifyBoxUpdate ( ) ;
2023-08-12 19:28:07 +05:30
if ( await users . isActivated ( ) ) safe ( onActivated ( { skipDnsSetup : false } ) , { debug } ) ; // run in background
}
async function uninitialize ( ) {
debug ( 'uninitializing platform' ) ;
2024-01-23 11:42:02 +01:00
if ( await users . isActivated ( ) ) await onDeactivated ( ) ;
2025-06-17 22:30:34 +02:00
await tasks . stopAllTasks ( ) ; // when box code is stopped/restarted, we get a chance to cleanup all the sudo+tasks
2023-08-12 19:28:07 +05:30
await database . uninitialize ( ) ;
}
async function onActivated ( restoreOptions ) {
2025-09-12 09:48:37 +02:00
assert . strictEqual ( typeof restoreOptions , 'object' ) ; // { backupSite, skipDnsSetup }
2023-08-12 19:28:07 +05:30
debug ( 'onActivated: starting post activation services' ) ;
// Starting the infra after a user is available means:
// 1. mail bounces can now be sent to the cloudron owner
// 2. the restore code path can run without sudo (since mail/ is non-root)
await startInfra ( restoreOptions ) ;
await cron . startJobs ( ) ;
await dockerProxy . start ( ) ; // this relies on the 'cloudron' docker network interface to be available
// disable responding to api calls via IP to not leak domain info. this is carefully placed as the last item, so it buys
// the UI some time to query the dashboard domain in the restore code path
2023-10-01 13:52:19 +05:30
if ( ! constants . TEST ) await timers . setTimeout ( 30000 ) ;
2023-08-12 19:28:07 +05:30
await reverseProxy . writeDefaultConfig ( { activated : true } ) ;
2024-01-30 11:52:59 +01:00
debug ( 'onActivated: finished' ) ;
2023-08-12 19:28:07 +05:30
}
2023-08-13 10:29:24 +05:30
2024-01-23 11:42:02 +01:00
async function onDeactivated ( ) {
debug ( 'onDeactivated: stopping post activation services' ) ;
await cron . stopJobs ( ) ;
await dockerProxy . stop ( ) ;
2025-06-11 22:00:09 +02:00
await oidcServer . stop ( ) ;
2024-01-23 11:42:02 +01:00
}
2024-04-27 11:10:24 +02:00
async function onDashboardLocationSet ( subdomain , domain ) {
assert . strictEqual ( typeof subdomain , 'string' ) ;
assert . strictEqual ( typeof domain , 'string' ) ;
2024-04-27 11:02:50 +02:00
2024-04-27 11:10:24 +02:00
await safe ( reverseProxy . writeDashboardConfig ( subdomain , domain ) , { debug } ) ; // ok to fail if no disk space
2025-06-11 22:00:09 +02:00
await oidcServer . stop ( ) ;
await oidcServer . start ( ) ;
2024-04-27 11:02:50 +02:00
}
2023-08-13 10:29:24 +05:30
async function onDashboardLocationChanged ( auditSource ) {
assert . strictEqual ( typeof auditSource , 'object' ) ;
2024-09-16 11:23:06 +02:00
// mark all apps to be reconfigured, all have ExtraHosts injected
2023-08-13 10:29:24 +05:30
const [ , installedApps ] = await safe ( apps . list ( ) ) ;
2024-09-16 11:23:06 +02:00
await safe ( apps . configureApps ( installedApps , { scheduleNow : true } , auditSource ) , { debug } ) ;
2023-08-13 10:29:24 +05:30
await safe ( services . rebuildService ( 'turn' , auditSource ) , { debug } ) ; // to update the realm variable
}
2023-08-21 18:18:03 +05:30
async function onMailServerLocationChanged ( auditSource ) {
assert . strictEqual ( typeof auditSource , 'object' ) ;
// mark apps using email addon to be reconfigured
const [ , installedApps ] = await safe ( apps . list ( ) ) ;
2025-03-08 12:04:13 +01:00
const appsUsingEmail = installedApps . filter ( ( a ) => ! ! a . manifest . addons ? . email || a . manifest . addons ? . sendmail ? . requiresValidCertificate ) ;
await safe ( apps . configureApps ( appsUsingEmail , { scheduleNow : true } , auditSource ) , { debug } ) ;
2023-08-21 18:18:03 +05:30
}
2025-03-10 21:14:55 +01:00
async function onMailServerIncomingDomainsChanged ( auditSource ) {
assert . strictEqual ( typeof auditSource , 'object' ) ;
// mark apps using email addon to be reconfigured
const [ , installedApps ] = await safe ( apps . list ( ) ) ;
const appsUsingEmail = installedApps . filter ( ( a ) => ! ! a . manifest . addons ? . email ) ;
await safe ( apps . configureApps ( appsUsingEmail , { scheduleNow : true } , auditSource ) , { debug } ) ;
}