diff --git a/src/shell.js b/src/shell.js index cc7cbbc45..64658bbb4 100644 --- a/src/shell.js +++ b/src/shell.js @@ -4,7 +4,6 @@ const assert = require('assert'), BoxError = require('./boxerror.js'), child_process = require('child_process'), debug = require('debug')('box:shell'), - once = require('./once.js'), path = require('path'), _ = require('./underscore.js'); @@ -17,7 +16,6 @@ function shell(tag) { bash: bash.bind(null, tag), spawn: spawn.bind(null, tag), sudo: sudo.bind(null, tag), - sudoCallback: sudoCallback.bind(null, tag) }; } @@ -134,73 +132,3 @@ async function sudo(tag, args, options) { return await spawn(tag, SUDO, spawnArgs, options); } - -function sudoCallback(tag, args, options, callback) { - assert.strictEqual(typeof tag, 'string'); - assert(Array.isArray(args)); - assert.strictEqual(typeof options, 'object'); - assert.strictEqual(typeof callback, 'function'); - - const sudoArgs = [ '-S' ]; // -S makes sudo read stdin for password - if (options.preserveEnv) sudoArgs.push('-E'); // -E preserves environment - - callback = once(callback); // exit may or may not be called after an 'error' - - const logFunc = options.logger || debug; - - if (options.onMessage) { // enable ipc - sudoArgs.push('--close-from=4'); // keep the ipc open. requires closefrom_override in sudoers file - options.stdio = ['pipe', 'pipe', 'pipe', 'ipc']; - } - - const spawnArgs = sudoArgs.concat(args); - - debug(`${tag} ${SUDO} ${spawnArgs.join(' ').replace(/\n/g, '\\n')}`); - const cp = child_process.spawn(SUDO, spawnArgs, options); - let stdoutResult = ''; - - cp.stdout.on('data', (data) => { - if (options.captureStdout) stdoutResult += data.toString('utf8'); - if (!options.quiet) logFunc(data.toString('utf8')); - }); - cp.stderr.on('data', (data) => logFunc(data.toString('utf8'))); - - cp.on('exit', function (code, signal) { - if (code === 0) return callback(null, options.captureStdout ? stdoutResult : null); - const e = new BoxError(BoxError.SHELL_ERROR, `${tag} exited with code ${code} signal ${signal}`); - e.code = code; - e.signal = signal; - - if (cp.terminated) { - debug(`${tag}: ${SUDO} ${spawnArgs.join(' ').replace(/\n/g, '\\n')} terminated`); // was killed by us - } else { - debug(`${tag}: ${SUDO} ${spawnArgs.join(' ').replace(/\n/g, '\\n')} errored`, e); - } - - callback(e); - }); - - cp.on('error', function (error) { - debug(`${tag}: ${SUDO} ${spawnArgs.join(' ').replace(/\n/g, '\\n')} errored`, error); - const e = new BoxError(BoxError.SHELL_ERROR, `${tag} errored with code ${error.code} message ${error.message}`); - e.code = error.code; - e.signal = error.signal; - callback(e); - }); - - // sudo forks and execs the program. sudo also hangs around as the parent of the program waiting on the program and also forwarding signals. - // sudo does not forward signals when the originator comes from the same process group. recently, there has been a change where it will - // forward signals as long as sudo or the command is not the group leader (https://www.sudo.ws/repos/sudo/rev/d1bf60eac57f) - // for us, this means that calling kill from this node process doesn't work since it's in the same group (and ubuntu 22 doesn't have the above fix). - // the workaround is to invoke a kill from a different process group and this is done by starting detached . negative pid mean to group - // another idea is: use "ps --pid cp.pid -o pid=" to get the pid of the command and then send it signal directly - cp.terminate = function () { - cp.terminated = true; // hint for better debug message in 'exit' - child_process.spawn('kill', ['-SIGKILL', -cp.pid], { detached: true }, (error) => { if (error) debug(`${tag} could not terminate`, error); }); - }; - - cp.stdin.end(); - if (options.onMessage) cp.on('message', options.onMessage); - - return cp; -} diff --git a/src/tasks.js b/src/tasks.js index 798b33b0d..96a41db3e 100644 --- a/src/tasks.js +++ b/src/tasks.js @@ -165,61 +165,61 @@ async function startTask(id, options) { let killTimerId = null, timedOut = false; - const sudoOptions = { preserveEnv: true }; - - const p = new Promise((resolve, reject) => { - gTasks[id] = shell.sudoCallback([ START_TASK_CMD, id, logFile, options.nice || 0, options.memoryLimit || 400, options.oomScoreAdjust || 0 ], sudoOptions, async function (sudoError) { - const code = sudoError ? sudoError.code : 0; - - debug(`startTask: ${id} completed with code ${code}. ignored: ${!gTasks[id]}`); - - if (options.timeout) clearTimeout(killTimerId); - - if (!gTasks[id]) return; // when box code is shutting dow. don't update the task status - - const [getError, task] = await safe(get(id)); - - let taskError = null; - if (!getError && !task.completed) { // taskworker crashed or was killed by us - if (code === 0) { - taskError = { - message: `Task ${id} ${timedOut ? 'timed out' : 'stopped'}` , - code: timedOut ? exports.ETIMEOUT : exports.ESTOPPED - }; - } else { // task crashed. for code, maybe we can check systemctl show box-task-1707 -p ExecMainStatus - taskError = { - message: code === 2 ? `Task ${id} crashed as it ran out of memory` : `Task ${id} crashed with code ${code}`, - code: exports.ECRASHED - }; - } - - // note that despite the update() here, we should handle the case where the box code was restarted and never got taskworker exit - await safe(setCompleted(id, { error: taskError })); - } else if (!getError && task.error) { - taskError = task.error; - } else if (!task) { // db got cleared in tests - taskError = new BoxError(BoxError.NOT_FOUND, `No such task ${id}`); - } - - delete gTasks[id]; - - debug(`startTask: ${id} done. error: %o`, taskError); - - if (taskError) reject(taskError); else resolve(task.result); - }); - - if (options.timeout) { - killTimerId = setTimeout(async function () { - debug(`startTask: task ${id} took too long. killing`); - timedOut = true; - const [error] = await safe(stopTask(id)); - if (error) debug(`startTask: error stopping task: ${error.message}`); - }, options.timeout); - } - }); - await update(id, { pending: false }); - return p; + + const ac = new AbortController(); + gTasks[id] = ac; + + if (options.timeout) { + killTimerId = setTimeout(async function () { + debug(`startTask: task ${id} took too long. killing`); + timedOut = true; + const [error] = await safe(stopTask(id)); + if (error) debug(`startTask: error stopping task: ${error.message}`); + }, options.timeout); + } + + const sudoOptions = { preserveEnv: true, signal: ac.signal }; + + const [sudoError] = await safe(shell.sudo([ START_TASK_CMD, id, logFile, options.nice || 0, options.memoryLimit || 400, options.oomScoreAdjust || 0 ], sudoOptions)); + const code = sudoError ? sudoError.code : 0; + + debug(`startTask: ${id} completed with code ${code}. ignored: ${!gTasks[id]}`); + + if (options.timeout) clearTimeout(killTimerId); + + if (!gTasks[id]) return; // when box code is shutting down, don't update the task status as "crashed". see stopAllTasks() + + const [getError, task] = await safe(get(id)); + + let taskError = null; + if (!getError && !task.completed) { // taskworker crashed or was killed by us + if (code === 0) { + taskError = { + message: `Task ${id} ${timedOut ? 'timed out' : 'stopped'}` , + code: timedOut ? exports.ETIMEOUT : exports.ESTOPPED + }; + } else { // task crashed. for code, maybe we can check systemctl show box-task-1707 -p ExecMainStatus + taskError = { + message: code === 2 ? `Task ${id} crashed as it ran out of memory` : `Task ${id} crashed with code ${code}`, + code: exports.ECRASHED + }; + } + + // note that despite the update() here, we should handle the case where the box code was restarted and never got taskworker exit + await safe(setCompleted(id, { error: taskError })); + } else if (!getError && task.error) { + taskError = task.error; + } else if (!task) { // db got cleared in tests + taskError = new BoxError(BoxError.NOT_FOUND, `No such task ${id}`); + } + + delete gTasks[id]; + + debug(`startTask: ${id} done. error: %o`, taskError); + + if (taskError) throw taskError; + return task.result; } async function stopTask(id) { @@ -234,9 +234,9 @@ async function stopTask(id) { async function stopAllTasks() { debug('stopAllTasks: ignoring exit status of starttask'); - const cps = Object.values(gTasks); + const acs = Object.values(gTasks); gTasks = {}; // this signals startTask() to not set completion status as "crashed" - cps.forEach(cp => { debug(`stopAllTasks: terminating process group ${cp.pid}`); cp.terminate(); }); // cleanup all the sudos and systemd-run + acs.forEach(ac => ac.abort()); // cleanup all the sudos and systemd-run const [error] = await safe(shell.sudo([ STOP_TASK_CMD, 'all' ], { cwd: paths.baseDir() })); if (error) debug(`stopAllTasks: error stopping stasks: ${error.message}`); }