Files
cloudron-box/src/apptaskmanager.js

92 lines
2.9 KiB
JavaScript
Raw Normal View History

2019-08-28 15:00:55 -07:00
'use strict';
exports = module.exports = {
start,
scheduleTask
2019-08-28 15:00:55 -07:00
};
const assert = require('assert'),
BoxError = require('./boxerror.js'),
2019-09-19 16:40:31 -07:00
debug = require('debug')('box:apptaskmanager'),
2019-08-28 15:00:55 -07:00
fs = require('fs'),
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
locks = require('./locks.js'),
2019-08-28 15:00:55 -07:00
path = require('path'),
paths = require('./paths.js'),
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
safe = require('safetydance'),
scheduler = require('./scheduler.js'),
2019-08-28 15:00:55 -07:00
tasks = require('./tasks.js');
2024-06-27 14:34:29 +02:00
const gActiveTasks = {}; // indexed by app id
const gPendingTasks = [];
let gStarted = false;
2019-08-28 15:00:55 -07:00
const TASK_CONCURRENCY = 3;
const DRAIN_TIMER_SECS = 1000;
2019-08-28 15:00:55 -07:00
let gDrainTimerId = null;
2019-08-28 15:00:55 -07:00
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
async function drain() {
debug(`drain: ${gPendingTasks.length} apptasks pending`);
2019-08-28 15:00:55 -07:00
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
for (let i = 0; i < gPendingTasks.length; i++) {
const space = Object.keys(gActiveTasks).length - TASK_CONCURRENCY;
if (space == 0) {
debug('At concurrency limit, cannot drain anymore');
break;
}
2019-08-28 15:00:55 -07:00
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
const { appId, taskId, options, onFinished } = gPendingTasks[i];
2019-08-28 15:00:55 -07:00
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
const [lockError] = await safe(locks.acquire(`${locks.TYPE_APP_PREFIX}${appId}`));
if (lockError) continue;
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
gPendingTasks.splice(i, 1);
gActiveTasks[appId] = {};
2019-08-28 15:00:55 -07:00
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
const logFile = path.join(paths.LOG_DIR, appId, 'apptask.log');
2020-08-19 18:23:44 +02:00
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
if (!fs.existsSync(path.dirname(logFile))) safe.fs.mkdirSync(path.dirname(logFile)); // ensure directory
scheduler.suspendAppJobs(appId);
2019-08-28 15:00:55 -07:00
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
tasks.startTask(taskId, Object.assign(options, { logFile }), async function (error, result) {
onFinished(error, result);
2019-08-28 15:00:55 -07:00
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
delete gActiveTasks[appId];
await locks.release(`${locks.TYPE_APP_PREFIX}${appId}`);
scheduler.resumeAppJobs(appId);
remove global lock Currently, the update/apptask/fullbackup/platformstart take a global lock and cannot run in parallel. This causes situations where when a user tries to trigger an apptask, it says "waiting for backup to finish..." etc The solution is to let them run in parallel. We need a lock at the app level as app operations running in parallel would be bad (tm). In addition, the update task needs a lock just for the update part. We also need multi-process locks. Running tasks as processes is core to our "kill" strategy. Various inter process locks were explored: * node's IPC mechanism with process.send(). But this only works for direct node.js children. taskworker is run via sudo and the IPC does not work. * File lock using O_EXCL. Basic ideas to create lock files. While file creation can be done atomically, it becomes complicated to clean up lock files when the tasks crash. We need a way to know what locks were held by the crashing task. flock and friends are not built-into node.js * sqlite/redis were options but introduce additional deps * Settled on MySQL based locking. Initial plan was to have row locks or table locks. Each row is a kind of lock. While implementing, it was found that we need many types of locks (and not just update lock and app locks). For example, we need locks for each task type, so that only one task type is active at a time. * Instead of rows, we can just lock table and have a json blob in it. This hit a road block that LOCK TABLE is per session and our db layer cannot handle this easily! i.e when issing two db.query() it might use two different connections from the pool. We have to expose the connection, release connection etc. * Next idea was atomic blob update of the blob checking if old blob was same. This approach, was finally refined into a version field. Phew!
2024-12-07 14:35:45 +01:00
});
}
2019-08-28 15:00:55 -07:00
gDrainTimerId = null;
if (gPendingTasks.length) gDrainTimerId = setTimeout(drain, DRAIN_TIMER_SECS); // check for released locks
}
async function start() {
assert.strictEqual(gDrainTimerId, null);
assert.strictEqual(gStarted, false);
debug('started');
gStarted = true;
if (gPendingTasks.length) gDrainTimerId = setTimeout(drain, DRAIN_TIMER_SECS);
}
function scheduleTask(appId, taskId, options, onFinished) {
assert.strictEqual(typeof appId, 'string');
assert.strictEqual(typeof taskId, 'string');
assert.strictEqual(typeof options, 'object');
assert.strictEqual(typeof onFinished, 'function');
if (appId in gActiveTasks) {
onFinished(new BoxError(BoxError.CONFLICT, `Task for ${appId} is already active`));
return;
}
2024-12-12 19:07:03 +01:00
// percent 1 is relies on the tasks "active" flag to indicate task is queued but not started yet
tasks.update(taskId, { percent: 1, message: gStarted ? 'Queued' : 'Waiting for platform to initialize' });
gPendingTasks.push({ appId, taskId, options, onFinished });
if (gStarted && !gDrainTimerId) gDrainTimerId = setTimeout(drain, DRAIN_TIMER_SECS);
2019-08-28 15:00:55 -07:00
}