Fix repair

If a task fails, we can either:
* allow other task ops to be called - we cannot do this because the ops are fine-grained. for example,
  a restore failure removes many things and calling set-memory or set-location in that state won't
  make sense.

* provide a generic repair route - this allows one to override args and call the failed task
  again. this is what we have now but has the issue that this repair function has to know about all
  the other op functions. for example, for argument validation. we can do some complicated refactoring
  to make it work if we want.

* just a generic total re-configure - this does not work because clone/restore/backup/datadir/uninstall/update
  failure leaves the app in a state which re-configure cannot do anything about.

* allow the failed op to be called again - this seems the easiest. we just allow the route to be called again
  in the error state.

* if we hit a state where even providing extra args, cannot get you out of this "error" state, we have to provide
  some repair route. for example, maybe the container disappeared by some docke error. user clicks 'repair' to
  recreate the container. this route does not have to take any args.

The final solution is:
* a failed task can be called again via the route. so we can resubmit any args and we get validation
* repair route just re-configures and can be called in any state to just rebuild container. re-configure is also
  doing only local changes (docker, nginx)
* install/clone failures are fixed using repair route. updated manifest can be passed in.
* UI shows backup selector for restore failures
* UI shows domain selector for change location failulre
This commit is contained in:
Girish Ramakrishnan
2019-11-23 18:35:51 -08:00
parent 37d7be93b5
commit 6a64f24e98
2 changed files with 25 additions and 35 deletions

View File

@@ -1348,9 +1348,10 @@ function getLogs(appId, options, callback) {
});
}
// does a re-configure when called from most states. for install/clone errors, it re-installs with an optional manifest
function repair(appId, data, auditSource, callback) {
assert.strictEqual(typeof appId, 'string');
assert.strictEqual(typeof data, 'object');
assert.strictEqual(typeof data, 'object'); // { manifest }
assert.strictEqual(typeof auditSource, 'object');
assert.strictEqual(typeof callback, 'function');
@@ -1359,35 +1360,33 @@ function repair(appId, data, auditSource, callback) {
get(appId, function (error, app) {
if (error) return callback(error);
const appError = app.error || {}; // repair can always be called
const newState = appError.installationState ? appError.installationState : exports.ISTATE_PENDING_CONFIGURE;
const errorState = (app.error && app.error.installationState) || exports.ISTATE_PENDING_CONFIGURE;
debug(`Repairing app with error: ${JSON.stringify(error)} and state: ${newState}`);
const task = {
args: {},
values: {},
requiredState: null
};
let values = _.pick(data, 'location', 'domain', 'alternateDomains');
if (errorState === exports.ISTATE_PENDING_INSTALL || errorState === exports.ISTATE_PENDING_CLONE) {
if (data.manifest) {
error = manifestFormat.parse(data.manifest);
if (error) return callback(new BoxError(BoxError.BAD_FIELD, `manifest error: ${error.message}`));
const locations = (values.location ? [ { subdomain: values.location, domain: values.domain } ] : []).concat(values.alternateDomains || []);
validateLocations(locations, function (error, domainObjectMap) {
error = checkManifestConstraints(data.manifest);
if (error) return callback(error);
task.values.manifest = data.manifest;
task.args.oldManifest = app.manifest;
}
}
addTask(appId, errorState, task, function (error, result) {
if (error) return callback(error);
tasks.get(appError.taskId || '', function (error, task) {
let args = !error ? task.args[1] : {}; // pick args for the failed task. the first argument is the app id
eventlog.add(eventlog.ACTION_APP_REPAIR, auditSource, { taskId: result.taskId, app });
if ('backupId' in data) {
args.restoreConfig = data.backupId ? { backupId: data.backupId, backupFormat: data.backupFormat, oldManifest: app.manifest } : null; // when null, apptask simply reinstalls
}
args.overwriteDns = 'overwriteDns' in data ? data.overwriteDns : false;
// create a new task instead of updating the old one, since it helps tracking
addTask(appId, newState, { args, values, requiredState: null }, function (error, result) {
if (error && error.reason === BoxError.ALREADY_EXISTS) error = getDuplicateErrorDetails(error.message, locations, domainObjectMap, { /* portBindings */});
if (error) return callback(error);
eventlog.add(eventlog.ACTION_APP_REPAIR, auditSource, { taskId: result.taskId, app, newState });
callback(null, { taskId: result.taskId });
});
});
callback(null, { taskId: result.taskId });
});
});
}