'use strict'; const BoxError = require('./boxerror.js'); exports = module.exports = { run, _applyBackupRetention: applyBackupRetention }; const apps = require('./apps.js'), archives = require('./archives.js'), assert = require('assert'), backupFormat = require('./backupformat.js'), backups = require('./backups.js'), constants = require('./constants.js'), debug = require('debug')('box:backupcleaner'), moment = require('moment'), path = require('path'), paths = require('./paths.js'), safe = require('safetydance'), storage = require('./storage.js'); function applyBackupRetention(allBackups, retention, referencedBackupIds) { assert(Array.isArray(allBackups)); assert.strictEqual(typeof retention, 'object'); assert(Array.isArray(referencedBackupIds)); const now = new Date(); for (const backup of allBackups) { if (backup.state === backups.BACKUP_STATE_ERROR) { backup.discardReason = 'error'; } else if (backup.state === backups.BACKUP_STATE_CREATING) { if ((now - backup.creationTime) < 48*60*60*1000) backup.keepReason = 'creating'; else backup.discardReason = 'creating-too-long'; } else if (referencedBackupIds.includes(backup.id)) { // could also be in archives backup.keepReason = 'referenced'; } else if ((backup.preserveSecs === -1) || ((now - backup.creationTime) < (backup.preserveSecs * 1000))) { backup.keepReason = 'preserveSecs'; } else if ((now - backup.creationTime < retention.keepWithinSecs * 1000) || retention.keepWithinSecs < 0) { backup.keepReason = 'keepWithinSecs'; } } const KEEP_FORMATS = { keepDaily: 'Y-M-D', keepWeekly: 'Y-W', keepMonthly: 'Y-M', keepYearly: 'Y' }; for (const format of [ 'keepDaily', 'keepWeekly', 'keepMonthly', 'keepYearly' ]) { if (!(format in retention)) continue; const n = retention[format]; // we want to keep "n" backups of format if (!n) continue; // disabled rule let lastPeriod = null, keptSoFar = 0; for (const backup of allBackups) { if (backup.discardReason) continue; // already discarded for some reason if (backup.keepReason && backup.keepReason !== 'referenced') continue; // kept for some other reason const period = moment(backup.creationTime).format(KEEP_FORMATS[format]); if (period === lastPeriod) continue; // already kept for this period lastPeriod = period; backup.keepReason = backup.keepReason ? `${backup.keepReason}+${format}` : format; if (++keptSoFar === n) break; } } if (retention.keepLatest) { const latestNormalBackup = allBackups.find(b => b.state === backups.BACKUP_STATE_NORMAL); if (latestNormalBackup && !latestNormalBackup.keepReason) latestNormalBackup.keepReason = 'latest'; } for (const backup of allBackups) { debug(`applyBackupRetention: ${backup.remotePath} keep/discard: ${backup.keepReason || backup.discardReason || 'unprocessed'}`); } } async function removeBackup(backupConfig, backup, progressCallback) { assert.strictEqual(typeof backupConfig, 'object'); assert.strictEqual(typeof backup, 'object'); assert.strictEqual(typeof progressCallback, 'function'); const backupFilePath = backupFormat.api(backup.format).getBackupFilePath(backupConfig, backup.remotePath); let removeError; if (backup.format ==='tgz') { progressCallback({ message: `${backup.remotePath}: Removing ${backupFilePath}`}); [removeError] = await safe(storage.api(backupConfig.provider).remove(backupConfig, backupFilePath)); } else { progressCallback({ message: `${backup.remotePath}: Removing directory ${backupFilePath}`}); [removeError] = await safe(storage.api(backupConfig.provider).removeDir(backupConfig, backupFilePath, progressCallback)); } if (removeError) { debug(`removeBackup: error removing backup ${removeError.message}`); return; } // prune empty directory if possible const [pruneError] = await safe(storage.api(backupConfig.provider).remove(backupConfig, path.dirname(backupFilePath))); if (pruneError) debug(`removeBackup: unable to prune backup directory ${path.dirname(backupFilePath)}: ${pruneError.message}`); const [delError] = await safe(backups.del(backup.id)); if (delError) debug(`removeBackup: error removing ${backup.id} from database. %o`, delError); else debug(`removeBackup: removed ${backup.remotePath}`); } async function cleanupAppBackups(backupConfig, retention, referencedBackupIds, progressCallback) { assert.strictEqual(typeof backupConfig, 'object'); assert.strictEqual(typeof retention, 'object'); assert(Array.isArray(referencedBackupIds)); assert.strictEqual(typeof progressCallback, 'function'); const removedAppBackupPaths = []; const allApps = await apps.list(); const allAppIds = allApps.map(a => a.id); const appBackups = await backups.getByTypePaged(backups.BACKUP_TYPE_APP, 1, 1000); // collate the backups by app id. note that the app could already have been uninstalled const appBackupsById = {}; for (const appBackup of appBackups) { if (!appBackupsById[appBackup.identifier]) appBackupsById[appBackup.identifier] = []; appBackupsById[appBackup.identifier].push(appBackup); } // apply backup policy per app. keep latest backup only for existing apps let appBackupsToRemove = []; for (const appId of Object.keys(appBackupsById)) { const appRetention = Object.assign({ keepLatest: allAppIds.includes(appId) }, retention); debug(`cleanupAppBackups: applying retention for appId ${appId} retention: ${JSON.stringify(appRetention)}`); applyBackupRetention(appBackupsById[appId], appRetention, referencedBackupIds); appBackupsToRemove = appBackupsToRemove.concat(appBackupsById[appId].filter(b => !b.keepReason)); } for (const appBackup of appBackupsToRemove) { await progressCallback({ message: `Removing app backup (${appBackup.identifier}): ${appBackup.id}`}); removedAppBackupPaths.push(appBackup.remotePath); await removeBackup(backupConfig, appBackup, progressCallback); // never errors } debug('cleanupAppBackups: done'); return removedAppBackupPaths; } async function cleanupMailBackups(backupConfig, retention, referencedBackupIds, progressCallback) { assert.strictEqual(typeof backupConfig, 'object'); assert.strictEqual(typeof retention, 'object'); assert(Array.isArray(referencedBackupIds)); assert.strictEqual(typeof progressCallback, 'function'); const removedMailBackupPaths = []; const mailBackups = await backups.getByTypePaged(backups.BACKUP_TYPE_MAIL, 1, 1000); applyBackupRetention(mailBackups, Object.assign({ keepLatest: true }, retention), referencedBackupIds); for (const mailBackup of mailBackups) { if (mailBackup.keepReason) continue; await progressCallback({ message: `Removing mail backup ${mailBackup.remotePath}`}); removedMailBackupPaths.push(mailBackup.remotePath); await removeBackup(backupConfig, mailBackup, progressCallback); // never errors } debug('cleanupMailBackups: done'); return removedMailBackupPaths; } async function cleanupBoxBackups(backupConfig, retention, progressCallback) { assert.strictEqual(typeof backupConfig, 'object'); assert.strictEqual(typeof retention, 'object'); assert.strictEqual(typeof progressCallback, 'function'); let referencedBackupIds = []; const removedBoxBackupPaths = []; const boxBackups = await backups.getByTypePaged(backups.BACKUP_TYPE_BOX, 1, 1000); applyBackupRetention(boxBackups, Object.assign({ keepLatest: true }, retention), [] /* references */); for (const boxBackup of boxBackups) { if (boxBackup.keepReason) { referencedBackupIds = referencedBackupIds.concat(boxBackup.dependsOn); continue; } await progressCallback({ message: `Removing box backup ${boxBackup.remotePath}`}); removedBoxBackupPaths.push(boxBackup.remotePath); await removeBackup(backupConfig, boxBackup, progressCallback); } debug('cleanupBoxBackups: done'); return { removedBoxBackupPaths, referencedBackupIds }; } // cleans up the database by checking if backup exists in the remote. this can happen if user had set some bucket policy async function cleanupMissingBackups(backupConfig, progressCallback) { assert.strictEqual(typeof backupConfig, 'object'); assert.strictEqual(typeof progressCallback, 'function'); const perPage = 1000; const missingBackupPaths = []; if (constants.TEST) return missingBackupPaths; let page = 1, result = []; do { result = await backups.list(page, perPage); for (const backup of result) { if (backup.state !== backups.BACKUP_STATE_NORMAL) continue; // note: errored and incomplete backups are cleaned up by the backup retention logic let backupFilePath = backupFormat.api(backup.format).getBackupFilePath(backupConfig, backup.remotePath); if (backup.format === 'rsync') backupFilePath = backupFilePath + '/'; // add trailing slash to indicate directory const [existsError, exists] = await safe(storage.api(backupConfig.provider).exists(backupConfig, backupFilePath)); if (existsError || exists) continue; await progressCallback({ message: `Removing missing backup ${backup.remotePath}`}); const [delError] = await safe(backups.del(backup.id)); if (delError) debug(`cleanupMissingBackups: error removing ${backup.id} from database. %o`, delError); missingBackupPaths.push(backup.remotePath); } ++ page; } while (result.length === perPage); debug('cleanupMissingBackups: done'); return missingBackupPaths; } // removes the snapshots of apps that have been uninstalled async function cleanupSnapshots(backupConfig) { assert.strictEqual(typeof backupConfig, 'object'); const contents = safe.fs.readFileSync(paths.SNAPSHOT_INFO_FILE, 'utf8'); const info = safe.JSON.parse(contents); if (!info) return; const progressCallback = (progress) => { debug(`cleanupSnapshots: ${progress.message}`); }; for (const appId of Object.keys(info)) { if (appId === 'box' || appId === 'mail') continue; const app = await apps.get(appId); if (app) continue; // app is still installed if (info[appId].format ==='tgz') { await safe(storage.api(backupConfig.provider).remove(backupConfig, backupFormat.api(info[appId].format).getBackupFilePath(backupConfig, `snapshot/app_${appId}`)), { debug }); } else { await safe(storage.api(backupConfig.provider).removeDir(backupConfig, backupFormat.api(info[appId].format).getBackupFilePath(backupConfig, `snapshot/app_${appId}`), progressCallback), { debug }); } safe.fs.unlinkSync(path.join(paths.BACKUP_INFO_DIR, `${appId}.sync.cache`)); safe.fs.unlinkSync(path.join(paths.BACKUP_INFO_DIR, `${appId}.sync.cache.new`)); await safe(backups.setSnapshotInfo(appId, null /* info */), { debug }); debug(`cleanupSnapshots: cleaned up snapshot of app ${appId}`); } debug('cleanupSnapshots: done'); } async function run(progressCallback) { assert.strictEqual(typeof progressCallback, 'function'); const backupConfig = await backups.getConfig(); const { retention } = await backups.getPolicy(); debug(`run: retention is ${JSON.stringify(retention)}`); const status = await backups.ensureMounted(); debug(`run: mount point status is ${JSON.stringify(status)}`); if (status.state !== 'active') throw new BoxError(BoxError.MOUNT_ERROR, `Backup endpoint is not mounted: ${status.message}`); if (retention.keepWithinSecs < 0) { debug('run: keeping all backups'); return {}; } await progressCallback({ percent: 10, message: 'Cleaning box backups' }); const { removedBoxBackupPaths, referencedBackupIds } = await cleanupBoxBackups(backupConfig, retention, progressCallback); // references is app or mail backup ids await progressCallback({ percent: 20, message: 'Cleaning mail backups' }); const removedMailBackupPaths = await cleanupMailBackups(backupConfig, retention, referencedBackupIds, progressCallback); await progressCallback({ percent: 40, message: 'Cleaning app backups' }); const archivedBackupIds = await archives.listBackupIds(); const removedAppBackupPaths = await cleanupAppBackups(backupConfig, retention, referencedBackupIds.concat(archivedBackupIds), progressCallback); await progressCallback({ percent: 70, message: 'Checking storage backend and removing stale entries in database' }); const missingBackupPaths = await cleanupMissingBackups(backupConfig, progressCallback); await progressCallback({ percent: 90, message: 'Cleaning snapshots' }); await cleanupSnapshots(backupConfig); return { removedBoxBackupPaths, removedMailBackupPaths, removedAppBackupPaths, missingBackupPaths }; }