add retry to platform.start instead
this is because it holds a lock and cannot be re-tried
See also 0c0aeeae4c which tried to
make it for all startup tasks
This commit is contained in:
+22
-10
@@ -10,7 +10,9 @@ exports = module.exports = {
|
||||
|
||||
const apps = require('./apps.js'),
|
||||
assert = require('assert'),
|
||||
BoxError = require('./boxerror.js'),
|
||||
debug = require('debug')('box:platform'),
|
||||
delay = require('delay'),
|
||||
fs = require('fs'),
|
||||
infra = require('./infra_version.js'),
|
||||
locker = require('./locker.js'),
|
||||
@@ -37,9 +39,7 @@ async function start(options) {
|
||||
// short-circuit for the restart case
|
||||
if (_.isEqual(infra, existingInfra)) {
|
||||
debug('platform is uptodate at version %s', infra.version);
|
||||
|
||||
onPlatformReady(false /* !infraChanged */);
|
||||
|
||||
await onPlatformReady(false /* !infraChanged */);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -48,15 +48,27 @@ async function start(options) {
|
||||
const error = locker.lock(locker.OP_PLATFORM_START);
|
||||
if (error) throw error;
|
||||
|
||||
if (existingInfra.version !== infra.version) await removeAllContainers();
|
||||
if (existingInfra.version === 'none') await volumes.mountAll(); // when restoring, mount all volumes
|
||||
await markApps(existingInfra, options); // mark app state before we start addons. this gives the db import logic a chance to mark an app as errored
|
||||
await services.startServices(existingInfra);
|
||||
await fs.promises.writeFile(paths.INFRA_VERSION_FILE, JSON.stringify(infra, null, 4));
|
||||
for (let attempt = 0; attempt < 5; attempt++) {
|
||||
try {
|
||||
if (existingInfra.version !== infra.version) await removeAllContainers();
|
||||
if (existingInfra.version === 'none') await volumes.mountAll(); // when restoring, mount all volumes
|
||||
await markApps(existingInfra, options); // mark app state before we start addons. this gives the db import logic a chance to mark an app as errored
|
||||
await services.startServices(existingInfra);
|
||||
await fs.promises.writeFile(paths.INFRA_VERSION_FILE, JSON.stringify(infra, null, 4));
|
||||
break;
|
||||
} catch (error) {
|
||||
// for some reason, mysql arbitrary restarts making startup tasks fail. this makes the box update stuck
|
||||
// LOST is when existing connection breaks. REFUSED is when new connection cannot connect at all
|
||||
const retry = error.reason === BoxError.DATABASE_ERROR && (error.code === 'PROTOCOL_CONNECTION_LOST' || error.code === 'ECONNREFUSED');
|
||||
debug(`Failed to start services. retry=${retry} (attempt ${attempt}): ${error.message}`);
|
||||
if (!retry) break;
|
||||
await delay(10000);
|
||||
}
|
||||
}
|
||||
|
||||
locker.unlock(locker.OP_PLATFORM_START);
|
||||
|
||||
onPlatformReady(true /* infraChanged */); // background
|
||||
await onPlatformReady(true /* infraChanged */);
|
||||
}
|
||||
|
||||
async function stopAllTasks() {
|
||||
@@ -67,7 +79,7 @@ async function onPlatformReady(infraChanged) {
|
||||
debug(`onPlatformReady: platform is ready. infra changed: ${infraChanged}`);
|
||||
exports._isReady = true;
|
||||
|
||||
if (infraChanged) await pruneInfraImages();
|
||||
if (infraChanged) await safe(pruneInfraImages(), { debug }); // ignore error
|
||||
|
||||
await apps.schedulePendingTasks();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user