From ae3a34287a51c633a42bbdd7ef824d1c104250b4 Mon Sep 17 00:00:00 2001 From: Girish Ramakrishnan Date: Wed, 30 Jul 2025 11:19:07 +0200 Subject: [PATCH] backup target: create snapshot and cache files per target snapshot file tracks the snapshot directory. when app gets deleted, the cleaner will remove the upstream snapshot directory when it runs. cache files are used in rsync logic to track what was uploading into snapshot in the previous run without needing to rescan upstream. --- ...250724102340-backupTargets-create-table.js | 14 +++- src/backupcleaner.js | 32 +++----- src/backupformat/rsync.js | 4 +- src/backuptargets.js | 82 +++++++++++-------- src/backuptask.js | 10 +-- src/constants.js | 2 + src/paths.js | 3 +- src/syncer.js | 7 +- src/test/syncer-test.js | 4 +- 9 files changed, 87 insertions(+), 71 deletions(-) diff --git a/migrations/20250724102340-backupTargets-create-table.js b/migrations/20250724102340-backupTargets-create-table.js index 2bbaf2542..ec0055252 100644 --- a/migrations/20250724102340-backupTargets-create-table.js +++ b/migrations/20250724102340-backupTargets-create-table.js @@ -1,6 +1,9 @@ 'use strict'; -const crypto = require('crypto'), +const child_process = require('child_process'), + crypto = require('crypto'), + fs = require('fs'), + path = require('path'), paths = require('../src/paths.js'); exports.up = async function (db) { @@ -57,8 +60,15 @@ exports.up = async function (db) { } await db.runSql('START TRANSACTION'); + const id = `bc-${crypto.randomUUID()}`; + + const targetInfoDir = path.join(paths.BACKUP_INFO_DIR, id); + console.log(`Moving existing cache and snapshot file into ${targetInfoDir}`); + fs.mkdirSync(targetInfoDir, { recursive: true }); + child_process.execSync(`find ${paths.BACKUP_INFO_DIR}/ -maxdepth 1 -type f -exec mv -t ${targetInfoDir}/ {} +`); + await db.runSql('INSERT INTO backupTargets (id, label, provider, configJson, limitsJson, retentionJson, schedule, encryptionJson, format, main) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', - [ `bc-${crypto.randomUUID()}`, label, provider, JSON.stringify(config), JSON.stringify(limits), JSON.stringify(retention), schedule, JSON.stringify(encryption), format, main ]); + [ id, label, provider, JSON.stringify(config), JSON.stringify(limits), JSON.stringify(retention), schedule, JSON.stringify(encryption), format, main ]); await db.runSql('DELETE FROM settings WHERE name=? OR name=? OR name=?', [ 'backup_storage', 'backup_limits', 'backup_policy' ]); await db.runSql('COMMIT'); diff --git a/src/backupcleaner.js b/src/backupcleaner.js index b11cb0326..865768ab6 100644 --- a/src/backupcleaner.js +++ b/src/backupcleaner.js @@ -18,7 +18,6 @@ const apps = require('./apps.js'), debug = require('debug')('box:backupcleaner'), moment = require('moment'), path = require('path'), - paths = require('./paths.js'), safe = require('safetydance'), storage = require('./storage.js'); @@ -243,35 +242,30 @@ async function cleanupMissingBackups(target, progressCallback) { } // removes the snapshots of apps that have been uninstalled -async function cleanupSnapshots(backupTarget) { +async function removeOldAppSnapshots(backupTarget) { assert.strictEqual(typeof backupTarget, 'object'); - const contents = safe.fs.readFileSync(paths.SNAPSHOT_INFO_FILE, 'utf8'); - const info = safe.JSON.parse(contents); - if (!info) return; + const snapshotInfo = await backupTargets.getSnapshotInfo(backupTarget); - const progressCallback = (progress) => { debug(`cleanupSnapshots: ${progress.message}`); }; + const progressCallback = (progress) => { debug(`removeOldAppSnapshots: ${progress.message}`); }; - for (const appId of Object.keys(info)) { + for (const appId of Object.keys(snapshotInfo)) { if (appId === 'box' || appId === 'mail') continue; const app = await apps.get(appId); - if (app) continue; // app is still installed + if (app !== null) continue; // app is still installed - if (info[appId].format ==='tgz') { - await safe(storage.api(backupTarget.provider).remove(backupTarget.config, backupFormat.api(info[appId].format).getBackupFilePath(backupTarget, `snapshot/app_${appId}`)), { debug }); + if (snapshotInfo[appId].format ==='tgz') { + await safe(storage.api(backupTarget.provider).remove(backupTarget.config, backupFormat.api(snapshotInfo[appId].format).getBackupFilePath(backupTarget, `snapshot/app_${appId}`)), { debug }); } else { - await safe(storage.api(backupTarget.provider).removeDir(backupTarget.config, backupFormat.api(info[appId].format).getBackupFilePath(backupTarget, `snapshot/app_${appId}`), progressCallback), { debug }); + await safe(storage.api(backupTarget.provider).removeDir(backupTarget.config, backupFormat.api(snapshotInfo[appId].format).getBackupFilePath(backupTarget, `snapshot/app_${appId}`), progressCallback), { debug }); } - safe.fs.unlinkSync(path.join(paths.BACKUP_INFO_DIR, `${appId}.sync.cache`)); - safe.fs.unlinkSync(path.join(paths.BACKUP_INFO_DIR, `${appId}.sync.cache.new`)); - - await safe(backupTargets.setSnapshotInfo(appId, null /* info */), { debug }); - debug(`cleanupSnapshots: cleaned up snapshot of app ${appId}`); + await backupTargets.setSnapshotInfo(backupTarget, appId, null /* info */); + debug(`removeOldAppSnapshots: removed snapshot of app ${appId}`); } - debug('cleanupSnapshots: done'); + debug('removeOldAppSnapshots: done'); } async function run(targetId, progressCallback) { @@ -305,8 +299,8 @@ async function run(targetId, progressCallback) { await progressCallback({ percent: 70, message: 'Checking storage backend and removing stale entries in database' }); const missingBackupPaths = await cleanupMissingBackups(backupTarget, progressCallback); - await progressCallback({ percent: 80, message: 'Cleaning snapshots' }); - await cleanupSnapshots(backupTarget); + await progressCallback({ percent: 80, message: 'Removing snapshots of uninstalled apps' }); + await removeOldAppSnapshots(backupTarget); await progressCallback({ percent: 80, message: 'Cleaning storage artifacts' }); await storage.api(backupTarget.provider).cleanup(backupTarget.config, progressCallback); diff --git a/src/backupformat/rsync.js b/src/backupformat/rsync.js index 33d45657b..1c87e2e6b 100644 --- a/src/backupformat/rsync.js +++ b/src/backupformat/rsync.js @@ -19,6 +19,7 @@ const assert = require('assert'), fs = require('fs'), hush = require('../hush.js'), path = require('path'), + paths = require('../paths.js'), ProgressStream = require('../progress-stream.js'), promiseRetry = require('../promise-retry.js'), safe = require('safetydance'), @@ -109,7 +110,8 @@ async function sync(backupTarget, remotePath, dataLayout, progressCallback) { // the number here has to take into account the s3.upload partSize (which is 10MB). So 20=200MB const concurrency = backupTarget.limits?.syncConcurrency || (backupTarget.provider === 's3' ? 20 : 10); - const changes = await syncer.sync(dataLayout); + const cacheFile = path.join(paths.BACKUP_INFO_DIR, backupTarget.id, `${dataLayout.getBasename()}.sync.cache`); + const changes = await syncer.sync(dataLayout, { cacheFile }); debug(`sync: processing ${changes.delQueue.length} deletes and ${changes.addQueue.length} additions`); const [delError] = await safe(async.eachLimit(changes.delQueue, concurrency, async (change) => await processSyncerChange(change, backupTarget, remotePath, dataLayout, progressCallback))); diff --git a/src/backuptargets.js b/src/backuptargets.js index 03d524b0f..6e3aef668 100644 --- a/src/backuptargets.js +++ b/src/backuptargets.js @@ -18,7 +18,6 @@ exports = module.exports = { startBackupTask, startCleanupTask, - cleanupCacheFilesSync, getSnapshotInfo, setSnapshotInfo, @@ -249,28 +248,28 @@ async function setPrimary(backupTarget, auditSource) { await eventlog.add(eventlog.ACTION_BACKUP_TARGET_UPDATE, auditSource, { backupTarget, primary: true }); } -async function del(target, auditSource) { - assert.strictEqual(typeof target, 'object'); +async function del(backupTarget, auditSource) { + assert.strictEqual(typeof backupTarget, 'object'); assert.strictEqual(typeof auditSource, 'object'); - if (target.primary) throw new BoxError(BoxError.CONFLICT, 'Cannot delete the primary backup target'); + if (backupTarget.primary) throw new BoxError(BoxError.CONFLICT, 'Cannot delete the primary backup target'); const queries = [ - { query: 'DELETE FROM backups WHERE targetId = ?', args: [ target.id ] }, - { query: 'DELETE FROM backupTargets WHERE id=? AND main=?', args: [ target.id, false ] }, // cannot delete primary + { query: 'DELETE FROM backups WHERE targetId = ?', args: [ backupTarget.id ] }, + { query: 'DELETE FROM backupTargets WHERE id=? AND main=?', args: [ backupTarget.id, false ] }, // cannot delete primary ]; const [error, result] = await safe(database.transaction(queries)); if (error && error.code === 'ER_NO_REFERENCED_ROW_2') throw new BoxError(BoxError.NOT_FOUND, error); if (error) throw error; if (result[1].affectedRows !== 1) throw new BoxError(BoxError.NOT_FOUND, 'Target not found'); - await eventlog.add(eventlog.ACTION_BACKUP_TARGET_REMOVE, auditSource, { backupTarget: target }); + await eventlog.add(eventlog.ACTION_BACKUP_TARGET_REMOVE, auditSource, { backupTarget: backupTarget }); - target.schedule = constants.CRON_PATTERN_NEVER; - await cron.handleBackupScheduleChanged(target); + backupTarget.schedule = constants.CRON_PATTERN_NEVER; + await cron.handleBackupScheduleChanged(backupTarget); - debug('del: clearing backup cache'); - cleanupCacheFilesSync(target); + const infoDir = path.join(paths.BACKUP_INFO_DIR, backupTarget.id); + safe.fs.rmdirSync(infoDir, { recursive: true }); } async function startBackupTask(target, auditSource) { @@ -303,38 +302,47 @@ async function startBackupTask(target, auditSource) { return taskId; } -// this function is used in migrations - 20200512172301-settings-backup-encryption.js -function cleanupCacheFilesSync(target) { - const files = safe.fs.readdirSync(path.join(paths.BACKUP_INFO_DIR, target.id)); - if (!files) return; +async function removeCacheFiles(backupTarget) { + assert.strictEqual(typeof backupTarget, 'object'); - files - .filter(function (f) { return f.endsWith('.sync.cache'); }) - .forEach(function (f) { - safe.fs.unlinkSync(path.join(paths.BACKUP_INFO_DIR, f)); - }); -} - -function getSnapshotInfo(id) { - assert.strictEqual(typeof id, 'string'); - - const contents = safe.fs.readFileSync(paths.SNAPSHOT_INFO_FILE, 'utf8'); - const info = safe.JSON.parse(contents); - if (!info) return { }; - return info[id] || { }; + const infoDir = path.join(paths.BACKUP_INFO_DIR, backupTarget.id); + const files = safe.fs.readdirSync(infoDir); + if (!files) throw new BoxError(BoxError.FS_ERROR, `Unable to access ${infoDir}: ${safe.error.message}`); + for (const f of files) { + if (!f.endsWith('.sync.cache')) continue; + safe.fs.unlinkSync(path.join(infoDir, f)); + } } // keeps track of contents of the snapshot directory. this provides a way to clean up backups of uninstalled apps -async function setSnapshotInfo(id, info) { - assert.strictEqual(typeof id, 'string'); +async function getSnapshotInfo(backupTarget) { + assert.strictEqual(typeof backupTarget, 'object'); + + const snapshotFilePath = path.join(paths.BACKUP_INFO_DIR, backupTarget.id, constants.SNAPSHOT_INFO_FILENAME); + const contents = safe.fs.readFileSync(snapshotFilePath, 'utf8'); + const info = safe.JSON.parse(contents); + return info || {}; +} + +// keeps track of contents of the snapshot directory. this provides a way to clean up backups of uninstalled apps +async function setSnapshotInfo(backupTarget, id, info) { + assert.strictEqual(typeof backupTarget, 'object'); + assert.strictEqual(typeof id, 'string'); // 'box', 'mail' or appId assert.strictEqual(typeof info, 'object'); - const contents = safe.fs.readFileSync(paths.SNAPSHOT_INFO_FILE, 'utf8'); - const data = safe.JSON.parse(contents) || { }; + const infoDir = path.join(paths.BACKUP_INFO_DIR, backupTarget.id); + const snapshotFilePath = path.join(infoDir, constants.SNAPSHOT_INFO_FILENAME); + const contents = safe.fs.readFileSync(snapshotFilePath, 'utf8'); + const data = safe.JSON.parse(contents) || {}; if (info) data[id] = info; else delete data[id]; - if (!safe.fs.writeFileSync(paths.SNAPSHOT_INFO_FILE, JSON.stringify(data, null, 4), 'utf8')) { + if (!safe.fs.writeFileSync(snapshotFilePath, JSON.stringify(data, null, 4), 'utf8')) { throw new BoxError(BoxError.FS_ERROR, safe.error.message); } + + if (!info) { // unlink the cache files + safe.fs.unlinkSync(path.join(infoDir, `${id}.sync.cache`)); + safe.fs.unlinkSync(path.join(infoDir, `${id}.sync.cache.new`)); + } } async function startCleanupTask(backupTarget, auditSource) { @@ -422,7 +430,9 @@ async function setConfig(backupTarget, newConfig, auditSource) { await storage.setupManagedMount(backupTarget.provider, newConfig, paths.MANAGED_BACKUP_MOUNT_DIR); debug('setConfig: clearing backup cache'); - cleanupCacheFilesSync(backupTarget); + // FIXME: this cleans up the cache files in case the bucket or the prefix changes and the destination already has something there + // however, this will also resync if just the credentials change + await removeCacheFiles(backupTarget); await update(backupTarget, { config: newConfig }); @@ -461,6 +471,8 @@ async function add(data, auditSource) { await storage.setupManagedMount(provider, config, paths.MANAGED_BACKUP_MOUNT_DIR); const id = `bc-${crypto.randomUUID()}`; + if (!safe.fs.mkdirSync(`${paths.BACKUP_INFO_DIR}/${id}`)) throw new BoxError(BoxError.FS_ERROR, `Failed to create info dir: ${safe.error.message}`); + await database.query('INSERT INTO backupTargets (id, label, provider, configJson, limitsJson, retentionJson, schedule, encryptionJson, format, main) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [ id, label, provider, JSON.stringify(config), JSON.stringify(limits), JSON.stringify(retention), schedule, JSON.stringify(encryption), format, false ]); diff --git a/src/backuptask.js b/src/backuptask.js index d5a1042e9..d5118a622 100644 --- a/src/backuptask.js +++ b/src/backuptask.js @@ -198,7 +198,7 @@ async function uploadBoxSnapshot(backupTarget, progressCallback) { debug(`uploadBoxSnapshot: took ${(new Date() - startTime)/1000} seconds`); - await backupTargets.setSnapshotInfo('box', { timestamp: new Date().toISOString(), format: backupTarget.format }); + await backupTargets.setSnapshotInfo(backupTarget, 'box', { timestamp: new Date().toISOString() }); } async function copy(backupTarget, srcRemotePath, destRemotePath, progressCallback) { @@ -272,9 +272,7 @@ async function rotateAppBackup(backupTarget, app, tag, options, progressCallback assert.strictEqual(typeof options, 'object'); assert.strictEqual(typeof progressCallback, 'function'); - const snapshotInfo = backupTargets.getSnapshotInfo(app.id); - - const manifest = snapshotInfo.restoreConfig ? snapshotInfo.restoreConfig.manifest : snapshotInfo.manifest; // compat + const manifest = app.manifest; const remotePath = `${tag}/app_${app.fqdn}_v${manifest.version}`; debug(`rotateAppBackup: rotating ${app.fqdn} to path ${remotePath}`); @@ -362,7 +360,7 @@ async function uploadAppSnapshot(backupTarget, app, progressCallback) { debug(`uploadAppSnapshot: ${app.fqdn} uploaded to ${remotePath}. ${(new Date() - startTime)/1000} seconds`); - await backupTargets.setSnapshotInfo(app.id, { timestamp: new Date().toISOString(), manifest: app.manifest, format: backupTarget.format }); + await backupTargets.setSnapshotInfo(backupTarget, app.id, { timestamp: new Date().toISOString(), manifest: app.manifest }); } async function backupAppWithTag(app, backupTarget, tag, options, progressCallback) { @@ -404,7 +402,7 @@ async function uploadMailSnapshot(backupTarget, progressCallback) { debug(`uploadMailSnapshot: took ${(new Date() - startTime)/1000} seconds`); - await backupTargets.setSnapshotInfo('mail', { timestamp: new Date().toISOString(), format: backupTarget.format }); + await backupTargets.setSnapshotInfo(backupTarget, 'mail', { timestamp: new Date().toISOString() }); } async function rotateMailBackup(backupTarget, tag, options, progressCallback) { diff --git a/src/constants.js b/src/constants.js index 746e7ab0f..9a74b0a57 100644 --- a/src/constants.js +++ b/src/constants.js @@ -80,6 +80,8 @@ exports = module.exports = { SECRET_PLACEHOLDER: String.fromCharCode(0x25CF).repeat(8), // also used in dashboard client.js + SNAPSHOT_INFO_FILENAME: 'snapshot-info.json', + CLOUDRON, TEST, diff --git a/src/paths.js b/src/paths.js index e0b85e343..4eee34137 100644 --- a/src/paths.js +++ b/src/paths.js @@ -41,11 +41,10 @@ exports = module.exports = { NGINX_CONFIG_DIR: path.join(baseDir(), 'platformdata/nginx'), NGINX_APPCONFIG_DIR: path.join(baseDir(), 'platformdata/nginx/applications'), NGINX_CERT_DIR: path.join(baseDir(), 'platformdata/nginx/cert'), - BACKUP_INFO_DIR: path.join(baseDir(), 'platformdata/backup'), + BACKUP_INFO_DIR: path.join(baseDir(), 'platformdata/backup'), // contains /{backupTarget.id}/ UPDATE_DIR: path.join(baseDir(), 'platformdata/update'), BOX_UPDATE_FILE: path.join(baseDir(), 'platformdata/update/boxupdate.json'), DISK_USAGE_EXCLUDE_FILE: path.join(baseDir(), 'platformdata/diskusage/exclude'), - SNAPSHOT_INFO_FILE: path.join(baseDir(), 'platformdata/backup/snapshot-info.json'), DYNDNS_INFO_FILE: path.join(baseDir(), 'platformdata/dyndns-info.json'), DHPARAMS_FILE: path.join(baseDir(), 'platformdata/dhparams.pem'), FEATURES_INFO_FILE: path.join(baseDir(), 'platformdata/features-info.json'), diff --git a/src/syncer.js b/src/syncer.js index 374874092..4f882b4af 100644 --- a/src/syncer.js +++ b/src/syncer.js @@ -6,7 +6,6 @@ const assert = require('assert'), debug = require('debug')('box:syncer'), fs = require('fs'), path = require('path'), - paths = require('./paths.js'), safe = require('safetydance'); exports = module.exports = { @@ -63,14 +62,14 @@ function ISFILE(x) { return (x & fs.constants.S_IFREG) === fs.constants.S_IFREG; } -async function sync(dataLayout) { +async function sync(dataLayout, options) { assert(dataLayout instanceof DataLayout, 'Expecting dataLayout to be a DataLayout'); + assert.strictEqual(typeof options, 'object'); const addQueue = [], delQueue = []; // separate queues. we have to process the del first and then the add let curCacheIndex = 0; - const cacheFile = path.join(paths.BACKUP_INFO_DIR, dataLayout.getBasename() + '.sync.cache'), - newCacheFile = path.join(paths.BACKUP_INFO_DIR, dataLayout.getBasename() + '.sync.cache.new'); + const cacheFile = options.cacheFile, newCacheFile = `${options.cacheFile}.new`; let cache = []; diff --git a/src/test/syncer-test.js b/src/test/syncer-test.js index 8421f8ee9..2fa27d1ec 100644 --- a/src/test/syncer-test.js +++ b/src/test/syncer-test.js @@ -16,7 +16,7 @@ const createTree = require('./common.js').createTree, syncer = require('../syncer.js'); const gTmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'syncer-test')), - gCacheFile = path.join(paths.BACKUP_INFO_DIR, path.basename(gTmpDir) + '.sync.cache'); + gCacheFile = path.join(paths.BACKUP_INFO_DIR, 'syncer-test.sync.cache'); describe('Syncer', function () { before(function () { @@ -24,7 +24,7 @@ describe('Syncer', function () { }); async function getChanges(dataLayout) { - const changes = await syncer.sync(dataLayout); + const changes = await syncer.sync(dataLayout, { cacheFile: gCacheFile }); syncer.finalize(changes); return changes.delQueue.concat(changes.addQueue); }