diff --git a/CHANGES b/CHANGES index f38c7703f..6d7acb65b 100644 --- a/CHANGES +++ b/CHANGES @@ -2961,4 +2961,5 @@ * system: disk usage is not collected in background. new disk ui, computes space on demand * backups: multiple backup targets * port bindings: add `enabledByDefault` property in manifest +* backups: store integrity information and perform validation diff --git a/src/backupformat/rsync.js b/src/backupformat/rsync.js index f08e005e2..b4a1d2c69 100644 --- a/src/backupformat/rsync.js +++ b/src/backupformat/rsync.js @@ -3,6 +3,7 @@ exports = module.exports = { download, upload, + verify, getFileExtension, _saveFsMetadata: saveFsMetadata, @@ -27,7 +28,8 @@ const assert = require('node:assert'), safe = require('safetydance'), shell = require('../shell.js')('backupformat/rsync'), stream = require('stream/promises'), - syncer = require('../syncer.js'); + syncer = require('../syncer.js'), + util = require('node:util'); async function addFile(sourceFile, encryption, uploader, progressCallback) { assert.strictEqual(typeof sourceFile, 'string'); @@ -199,18 +201,18 @@ async function restoreFsMetadata(dataLayout, metadataFile) { } } -async function downloadDir(backupTarget, backupFilePath, dataLayout, progressCallback) { +async function downloadDir(backupTarget, remotePath, dataLayout, progressCallback) { assert.strictEqual(typeof backupTarget, 'object'); - assert.strictEqual(typeof backupFilePath, 'string'); + assert.strictEqual(typeof remotePath, 'string'); assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout'); assert.strictEqual(typeof progressCallback, 'function'); const encryptedFilenames = backupTarget.encryption?.encryptedFilenames || false; - debug(`downloadDir: ${backupFilePath} to ${dataLayout.toString()}. encryption filenames: ${encryptedFilenames} content: ${!!backupTarget.encryption}`); + debug(`downloadDir: ${remotePath} to ${dataLayout.toString()}. encryption filenames: ${encryptedFilenames} content: ${!!backupTarget.encryption}`); async function downloadFile(entry) { - let relativePath = path.relative(backupFilePath, entry.path); + let relativePath = path.relative(remotePath, entry.path); if (encryptedFilenames) { const { error, result } = hush.decryptFilePath(relativePath, backupTarget.encryption); if (error) throw new BoxError(BoxError.CRYPTO_ERROR, 'Unable to decrypt file'); @@ -261,7 +263,7 @@ async function downloadDir(backupTarget, backupFilePath, dataLayout, progressCal const concurrency = backupTarget.limits?.downloadConcurrency || (backupTarget.provider === 's3' ? 30 : 10); let marker = null; while (true) { - const batch = await backupTargets.storageApi(backupTarget).listDir(backupTarget.config, backupFilePath, marker === null ? 1 : 1000, marker); // try with one file first. if that works out, we continue faster + const batch = await backupTargets.storageApi(backupTarget).listDir(backupTarget.config, remotePath, marker === null ? 1 : 1000, marker); // try with one file first. if that works out, we continue faster await async.eachLimit(batch.entries, concurrency, downloadFile); if (!batch.marker) break; marker = batch.marker; @@ -295,3 +297,72 @@ function getFileExtension(encryption) { return ''; // this also signals to backupcleanear that we are dealing with directories } + +async function verify(backupTarget, remotePath, integrityMap, progressCallback) { + assert.strictEqual(typeof backupTarget, 'object'); + assert.strictEqual(typeof remotePath, 'string'); + assert(util.types.isMap(integrityMap), 'integrityMap should be a Map'); + assert.strictEqual(typeof progressCallback, 'function'); + + debug(`verify: Verifying ${remotePath}`); + + const encryptedFilenames = backupTarget.encryption?.encryptedFilenames || false; + let fileCount = 0; + + async function validateFile(entry) { + let relativePath = path.relative(remotePath, entry.path); + if (encryptedFilenames) { + const { error, result } = hush.decryptFilePath(relativePath, backupTarget.encryption); + if (error) throw new BoxError(BoxError.CRYPTO_ERROR, 'Unable to decrypt file'); + relativePath = result; + } + + ++fileCount; + const sourceStream = await backupTargets.storageApi(backupTarget).download(backupTarget.config, entry.path); + + const ps = new ProgressStream({ interval: 10000 }); // display a progress every 10 seconds + ps.on('progress', function (progress) { + const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024); + if (!transferred && !speed) return progressCallback({ message: `Downloading ${entry.path}` }); // 0M@0MBps looks wrong + progressCallback({ message: `Downloading ${entry.path}: ${transferred}M@${speed}MBps` }); + }); + + const streams = [ sourceStream, ps ]; + + if (backupTarget.encryption) { + const decryptStream = new DecryptStream(backupTarget.encryption); + streams.push(decryptStream); + } + + const hash = new HashStream(); + streams.push(hash); + + await stream.pipeline(streams); + + const integrity = integrityMap.get(relativePath); + if (ps.stats().transferred !== integrity.size) throw new BoxError(BoxError.BAD_STATE, `${entry.path} has size ${ps.stats().transferred}. Expecting ${integrity.size}`); + if (hash.digest() !== integrity.sha256) throw new BoxError(BoxError.BAD_STATE, `${entry.path} has size ${hash.digest()}. Expecting ${integrity.sha256}`); + } + + debug(integrityMap.entries()); + + // https://www.digitalocean.com/community/questions/rate-limiting-on-spaces?answer=40441 + const concurrency = backupTarget.limits?.downloadConcurrency || (backupTarget.provider === 's3' ? 30 : 10); + let marker = null; + while (true) { + const batch = await backupTargets.storageApi(backupTarget).listDir(backupTarget.config, remotePath, marker === null ? 1 : 1000, marker); // try with one file first. if that works out, we continue faster + await async.eachLimit(batch.entries, concurrency, validateFile); + if (!batch.marker) break; + marker = batch.marker; + } + + const check = (x, y) => { return x === y ? { status: 'passed' } : { status: 'failed', message: `Expecting ${x} but got ${y}` }; }; + + if (integrityMap.size !== fileCount) throw new BoxError(BoxError.BAD_STATE, `Got ${fileCount} files. Expecting ${integrityMap.size()} files`); + + return { + size: { status: 'passed' }, + fileCount: check(integrityMap.size, fileCount), + sha256: { status: 'passed' }, + }; +}