add retry to platform.start instead

this is because it holds a lock and cannot be re-tried

See also 0c0aeeae4c which tried to
make it for all startup tasks
This commit is contained in:
Girish Ramakrishnan
2021-11-02 22:30:38 -07:00
parent 4ffe03553a
commit af2a8ba07f
2 changed files with 24 additions and 20 deletions
+22 -10
View File
@@ -10,7 +10,9 @@ exports = module.exports = {
const apps = require('./apps.js'),
assert = require('assert'),
BoxError = require('./boxerror.js'),
debug = require('debug')('box:platform'),
delay = require('delay'),
fs = require('fs'),
infra = require('./infra_version.js'),
locker = require('./locker.js'),
@@ -37,9 +39,7 @@ async function start(options) {
// short-circuit for the restart case
if (_.isEqual(infra, existingInfra)) {
debug('platform is uptodate at version %s', infra.version);
onPlatformReady(false /* !infraChanged */);
await onPlatformReady(false /* !infraChanged */);
return;
}
@@ -48,15 +48,27 @@ async function start(options) {
const error = locker.lock(locker.OP_PLATFORM_START);
if (error) throw error;
if (existingInfra.version !== infra.version) await removeAllContainers();
if (existingInfra.version === 'none') await volumes.mountAll(); // when restoring, mount all volumes
await markApps(existingInfra, options); // mark app state before we start addons. this gives the db import logic a chance to mark an app as errored
await services.startServices(existingInfra);
await fs.promises.writeFile(paths.INFRA_VERSION_FILE, JSON.stringify(infra, null, 4));
for (let attempt = 0; attempt < 5; attempt++) {
try {
if (existingInfra.version !== infra.version) await removeAllContainers();
if (existingInfra.version === 'none') await volumes.mountAll(); // when restoring, mount all volumes
await markApps(existingInfra, options); // mark app state before we start addons. this gives the db import logic a chance to mark an app as errored
await services.startServices(existingInfra);
await fs.promises.writeFile(paths.INFRA_VERSION_FILE, JSON.stringify(infra, null, 4));
break;
} catch (error) {
// for some reason, mysql arbitrary restarts making startup tasks fail. this makes the box update stuck
// LOST is when existing connection breaks. REFUSED is when new connection cannot connect at all
const retry = error.reason === BoxError.DATABASE_ERROR && (error.code === 'PROTOCOL_CONNECTION_LOST' || error.code === 'ECONNREFUSED');
debug(`Failed to start services. retry=${retry} (attempt ${attempt}): ${error.message}`);
if (!retry) break;
await delay(10000);
}
}
locker.unlock(locker.OP_PLATFORM_START);
onPlatformReady(true /* infraChanged */); // background
await onPlatformReady(true /* infraChanged */);
}
async function stopAllTasks() {
@@ -67,7 +79,7 @@ async function onPlatformReady(infraChanged) {
debug(`onPlatformReady: platform is ready. infra changed: ${infraChanged}`);
exports._isReady = true;
if (infraChanged) await pruneInfraImages();
if (infraChanged) await safe(pruneInfraImages(), { debug }); // ignore error
await apps.schedulePendingTasks();
}