2025-06-16 14:18:49 +02:00
|
|
|
#!/usr/bin/env -S node --unhandled-rejections=strict
|
2020-08-06 14:36:25 -07:00
|
|
|
|
2018-12-09 03:20:00 -08:00
|
|
|
'use strict';
|
|
|
|
|
|
2021-05-18 13:28:48 -07:00
|
|
|
const apptask = require('./apptask.js'),
|
2021-07-14 11:07:19 -07:00
|
|
|
backupCleaner = require('./backupcleaner.js'),
|
2025-08-15 16:09:58 +05:30
|
|
|
backupIntegrity = require('./backupintegrity.js'),
|
2021-07-14 11:07:19 -07:00
|
|
|
backuptask = require('./backuptask.js'),
|
2025-07-18 19:33:34 +02:00
|
|
|
BoxError = require('./boxerror.js'),
|
2023-08-13 10:06:01 +05:30
|
|
|
dashboard = require('./dashboard.js'),
|
2018-12-09 03:20:00 -08:00
|
|
|
database = require('./database.js'),
|
2021-08-13 17:22:28 -07:00
|
|
|
dns = require('./dns.js'),
|
2023-07-08 19:48:12 +05:30
|
|
|
dyndns = require('./dyndns.js'),
|
2019-10-25 15:58:11 -07:00
|
|
|
externalLdap = require('./externalldap.js'),
|
2025-08-14 11:17:38 +05:30
|
|
|
fs = require('node:fs'),
|
remove global lock
Currently, the update/apptask/fullbackup/platformstart take a
global lock and cannot run in parallel. This causes situations
where when a user tries to trigger an apptask, it says "waiting for
backup to finish..." etc
The solution is to let them run in parallel. We need a lock at the
app level as app operations running in parallel would be bad (tm).
In addition, the update task needs a lock just for the update part.
We also need multi-process locks. Running tasks as processes is core
to our "kill" strategy.
Various inter process locks were explored:
* node's IPC mechanism with process.send(). But this only works for direct node.js
children. taskworker is run via sudo and the IPC does not work.
* File lock using O_EXCL. Basic ideas to create lock files. While file creation
can be done atomically, it becomes complicated to clean up lock files when
the tasks crash. We need a way to know what locks were held by the crashing task.
flock and friends are not built-into node.js
* sqlite/redis were options but introduce additional deps
* Settled on MySQL based locking. Initial plan was to have row locks
or table locks. Each row is a kind of lock. While implementing, it was found that
we need many types of locks (and not just update lock and app locks). For example,
we need locks for each task type, so that only one task type is active at a time.
* Instead of rows, we can just lock table and have a json blob in it. This hit a road
block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e
when issing two db.query() it might use two different connections from the pool. We have to
expose the connection, release connection etc.
* Next idea was atomic blob update of the blob checking if old blob was same. This approach,
was finally refined into a version field.
Phew!
2024-12-07 14:35:45 +01:00
|
|
|
locks = require('./locks.js'),
|
2023-08-04 20:54:16 +05:30
|
|
|
mailServer = require('./mailserver.js'),
|
2025-08-14 11:17:38 +05:30
|
|
|
net = require('node:net'),
|
2018-12-10 20:20:53 -08:00
|
|
|
reverseProxy = require('./reverseproxy.js'),
|
2021-07-12 23:35:30 -07:00
|
|
|
safe = require('safetydance'),
|
2018-12-09 03:20:00 -08:00
|
|
|
tasks = require('./tasks.js'),
|
2025-07-17 09:53:29 +02:00
|
|
|
timers = require('timers/promises'),
|
2022-04-15 17:40:46 -05:00
|
|
|
updater = require('./updater.js');
|
2018-12-09 03:20:00 -08:00
|
|
|
|
|
|
|
|
const TASKS = { // indexed by task type
|
2019-08-26 15:55:57 -07:00
|
|
|
app: apptask.run,
|
2025-07-18 10:56:52 +02:00
|
|
|
appBackup: backuptask.appBackup,
|
2021-09-26 18:37:04 -07:00
|
|
|
backup: backuptask.fullBackup,
|
2025-06-20 19:04:55 +02:00
|
|
|
boxUpdate: updater.updateBox,
|
2021-05-18 13:28:48 -07:00
|
|
|
checkCerts: reverseProxy.checkCerts,
|
2023-08-14 09:40:31 +05:30
|
|
|
prepareDashboardLocation: dashboard.prepareLocation,
|
2021-07-14 11:07:19 -07:00
|
|
|
cleanBackups: backupCleaner.run,
|
2019-10-25 15:58:11 -07:00
|
|
|
syncExternalLdap: externalLdap.sync,
|
2023-08-04 20:54:16 +05:30
|
|
|
changeMailLocation: mailServer.changeLocation,
|
2021-08-13 17:22:28 -07:00
|
|
|
syncDnsRecords: dns.syncDnsRecords,
|
2023-07-08 19:48:12 +05:30
|
|
|
syncDyndns: dyndns.sync,
|
2025-08-15 16:09:58 +05:30
|
|
|
checkBackupIntegrity: backupIntegrity.check,
|
2018-12-10 21:05:46 -08:00
|
|
|
|
2025-07-18 20:55:46 +02:00
|
|
|
// testing
|
|
|
|
|
identity: async (arg, progressCallback) => { progressCallback({ percent: 20 }); return arg; },
|
|
|
|
|
error: async (arg, progressCallback) => { progressCallback({ percent: 20 }); throw new Error(`Failed for arg: ${arg}`); },
|
|
|
|
|
crash: (arg) => { throw new Error(`Crashing for arg: ${arg}`); }, // the test looks for this debug string in the log file
|
|
|
|
|
sleep: async (arg) => await timers.setTimeout(parseInt(arg, 10))
|
2018-12-09 03:20:00 -08:00
|
|
|
};
|
|
|
|
|
|
2020-08-04 22:16:38 -07:00
|
|
|
if (process.argv.length !== 4) {
|
|
|
|
|
console.error('Pass the taskid and logfile as argument');
|
|
|
|
|
process.exit(1);
|
|
|
|
|
}
|
2018-12-09 03:20:00 -08:00
|
|
|
|
|
|
|
|
const taskId = process.argv[2];
|
2020-08-04 22:16:38 -07:00
|
|
|
const logFile = process.argv[3];
|
2021-08-30 22:01:34 -07:00
|
|
|
let logFd = null;
|
|
|
|
|
|
|
|
|
|
async function setupLogging() {
|
|
|
|
|
logFd = fs.openSync(logFile, 'a');
|
|
|
|
|
// we used to write using a stream before but it caches internally and there is no way to flush it when things crash
|
|
|
|
|
process.stdout.write = process.stderr.write = function (...args) {
|
|
|
|
|
const callback = typeof args[args.length-1] === 'function' ? args.pop() : function () {}; // callback is required for fs.write
|
|
|
|
|
fs.write.apply(fs, [logFd, ...args, callback]);
|
|
|
|
|
};
|
2023-05-15 19:09:40 +02:00
|
|
|
process.stdout.logFile = logFile; // used by update task
|
2021-08-30 22:01:34 -07:00
|
|
|
}
|
2020-08-04 22:16:38 -07:00
|
|
|
|
network: fix premature connection closures with node 20 and above
the happy eyeballs implementation in node is buggy. ipv4 and ipv6 connections
are made in parallel and whichever responds first is chosen. when there is no
ipv6 (immediately errors with ENETUNREACH/EHOSTUNREACH) and when ipv4 is > 250ms,
the code erroneously times out.
see also https://github.com/nodejs/node/issues/54359
reproduction for those servers:
const options = {
hostname: 'www.cloudron.io', port: 80, path: '/', method: 'HEAD',
// family: 4, // uncomment to make it work
};
const req = require('http').request(options, (res) => {
console.log('statusCode:', res.statusCode);
res.on('data', () => {}); // drain
});
req.on('socket', (socket) => console.log('Socket assigned to request', socket););
req.on('error', (e) => console.error(e));
req.end();
2024-10-31 09:38:40 +01:00
|
|
|
// happy eyeballs workaround. see box.js for detailed note
|
|
|
|
|
async function setupNetworking() {
|
2024-10-31 10:07:11 +01:00
|
|
|
net.setDefaultAutoSelectFamilyAttemptTimeout(2500);
|
network: fix premature connection closures with node 20 and above
the happy eyeballs implementation in node is buggy. ipv4 and ipv6 connections
are made in parallel and whichever responds first is chosen. when there is no
ipv6 (immediately errors with ENETUNREACH/EHOSTUNREACH) and when ipv4 is > 250ms,
the code erroneously times out.
see also https://github.com/nodejs/node/issues/54359
reproduction for those servers:
const options = {
hostname: 'www.cloudron.io', port: 80, path: '/', method: 'HEAD',
// family: 4, // uncomment to make it work
};
const req = require('http').request(options, (res) => {
console.log('statusCode:', res.statusCode);
res.on('data', () => {}); // drain
});
req.on('socket', (socket) => console.log('Socket assigned to request', socket););
req.on('error', (e) => console.error(e));
req.end();
2024-10-31 09:38:40 +01:00
|
|
|
}
|
|
|
|
|
|
2021-08-30 22:01:34 -07:00
|
|
|
// this is also used as the 'uncaughtException' handler which can only have synchronous functions
|
2025-07-17 09:53:29 +02:00
|
|
|
// taskworker.sh forwards the exit code of the actual worker. It's either a raw signal number OR the exit code. So, choose exit codes > 31
|
|
|
|
|
// 50 - internal error , 70 - SIGTERM exit
|
2021-08-30 22:01:34 -07:00
|
|
|
function exitSync(status) {
|
|
|
|
|
if (status.error) fs.write(logFd, status.error.stack + '\n', function () {});
|
2025-07-25 12:55:14 +02:00
|
|
|
fs.write(logFd, `Exiting with code ${status.code}\n`, function () {});
|
2021-08-30 22:01:34 -07:00
|
|
|
fs.fsyncSync(logFd);
|
|
|
|
|
fs.closeSync(logFd);
|
|
|
|
|
process.exit(status.code);
|
2019-07-26 17:11:33 -07:00
|
|
|
}
|
|
|
|
|
|
2025-07-18 19:33:34 +02:00
|
|
|
function toTaskError(runError) {
|
|
|
|
|
if (runError instanceof BoxError) return runError.toPlainObject();
|
|
|
|
|
return {
|
|
|
|
|
message: `Task crashed. ${runError.message}`,
|
|
|
|
|
stack: runError.stack,
|
|
|
|
|
code: tasks.ECRASHED
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2018-12-09 03:20:00 -08:00
|
|
|
// Main process starts here
|
2020-07-31 12:59:15 -07:00
|
|
|
const startTime = new Date();
|
2018-12-09 03:20:00 -08:00
|
|
|
|
2024-10-31 09:46:36 +01:00
|
|
|
async function main() {
|
2022-04-15 17:40:46 -05:00
|
|
|
try {
|
2024-10-31 09:46:36 +01:00
|
|
|
await setupLogging();
|
|
|
|
|
await setupNetworking();
|
|
|
|
|
await database.initialize();
|
remove global lock
Currently, the update/apptask/fullbackup/platformstart take a
global lock and cannot run in parallel. This causes situations
where when a user tries to trigger an apptask, it says "waiting for
backup to finish..." etc
The solution is to let them run in parallel. We need a lock at the
app level as app operations running in parallel would be bad (tm).
In addition, the update task needs a lock just for the update part.
We also need multi-process locks. Running tasks as processes is core
to our "kill" strategy.
Various inter process locks were explored:
* node's IPC mechanism with process.send(). But this only works for direct node.js
children. taskworker is run via sudo and the IPC does not work.
* File lock using O_EXCL. Basic ideas to create lock files. While file creation
can be done atomically, it becomes complicated to clean up lock files when
the tasks crash. We need a way to know what locks were held by the crashing task.
flock and friends are not built-into node.js
* sqlite/redis were options but introduce additional deps
* Settled on MySQL based locking. Initial plan was to have row locks
or table locks. Each row is a kind of lock. While implementing, it was found that
we need many types of locks (and not just update lock and app locks). For example,
we need locks for each task type, so that only one task type is active at a time.
* Instead of rows, we can just lock table and have a json blob in it. This hit a road
block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e
when issing two db.query() it might use two different connections from the pool. We have to
expose the connection, release connection etc.
* Next idea was atomic blob update of the blob checking if old blob was same. This approach,
was finally refined into a version field.
Phew!
2024-12-07 14:35:45 +01:00
|
|
|
locks.setTaskId(taskId);
|
2024-10-31 09:46:36 +01:00
|
|
|
} catch (initError) {
|
2024-11-01 16:15:28 +01:00
|
|
|
console.error(initError);
|
|
|
|
|
return process.exit(50);
|
|
|
|
|
}
|
2024-10-31 09:46:36 +01:00
|
|
|
|
2024-11-01 16:15:28 +01:00
|
|
|
const debug = require('debug')('box:taskworker'); // require this here so that logging handler is already setup
|
2024-10-31 09:46:36 +01:00
|
|
|
|
2025-06-17 22:30:34 +02:00
|
|
|
process.on('SIGTERM', () => {
|
|
|
|
|
debug('Terminated');
|
2025-07-17 09:53:29 +02:00
|
|
|
exitSync({ code: 70 });
|
2025-06-17 22:30:34 +02:00
|
|
|
});
|
2024-10-31 09:46:36 +01:00
|
|
|
|
2024-11-01 16:15:28 +01:00
|
|
|
// ensure we log task crashes with the task logs. neither console.log nor debug are sync for some reason
|
|
|
|
|
process.on('uncaughtException', (error) => exitSync({ error, code: 1 }));
|
2024-10-31 09:46:36 +01:00
|
|
|
|
2024-11-01 16:15:28 +01:00
|
|
|
debug(`Starting task ${taskId}. Logs are at ${logFile}`);
|
2024-10-31 09:46:36 +01:00
|
|
|
|
2024-11-01 16:15:28 +01:00
|
|
|
const [getError, task] = await safe(tasks.get(taskId));
|
|
|
|
|
if (getError) return exitSync({ error: getError, code: 50 });
|
|
|
|
|
if (!task) return exitSync({ error: new Error(`Task ${taskId} not found`), code: 50 });
|
2024-10-31 09:46:36 +01:00
|
|
|
|
2024-11-01 16:15:28 +01:00
|
|
|
async function progressCallback(progress) {
|
|
|
|
|
await safe(tasks.update(taskId, progress), { debug });
|
|
|
|
|
}
|
2024-10-31 09:46:36 +01:00
|
|
|
|
2025-07-18 20:55:46 +02:00
|
|
|
const taskName = task.type.replace(/_.*/,'');
|
|
|
|
|
debug(`Running task of type ${taskName}`);
|
|
|
|
|
const [runError, result] = await safe(TASKS[taskName].apply(null, task.args.concat(progressCallback)));
|
2025-07-18 19:33:34 +02:00
|
|
|
const progress = {
|
|
|
|
|
result: result || null,
|
|
|
|
|
error: runError ? toTaskError(runError) : null,
|
|
|
|
|
percent: 100
|
|
|
|
|
};
|
|
|
|
|
|
2025-07-25 12:55:14 +02:00
|
|
|
await safe(tasks.setCompleted(taskId, progress), { debug });
|
|
|
|
|
|
2025-07-18 19:33:34 +02:00
|
|
|
debug(`Task took ${(new Date() - startTime)/1000} seconds`);
|
|
|
|
|
|
2025-07-25 12:55:14 +02:00
|
|
|
exitSync({ error: runError, code: (!runError || runError instanceof BoxError) ? 0 : 50 }); // handled error vs run time crash
|
2024-10-31 09:46:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
main();
|