rsync: integrity check

This commit is contained in:
Girish Ramakrishnan
2025-08-15 21:35:34 +05:30
parent 832a25601d
commit 20b75b4065
2 changed files with 78 additions and 6 deletions

View File

@@ -3,6 +3,7 @@
exports = module.exports = {
download,
upload,
verify,
getFileExtension,
_saveFsMetadata: saveFsMetadata,
@@ -27,7 +28,8 @@ const assert = require('node:assert'),
safe = require('safetydance'),
shell = require('../shell.js')('backupformat/rsync'),
stream = require('stream/promises'),
syncer = require('../syncer.js');
syncer = require('../syncer.js'),
util = require('node:util');
async function addFile(sourceFile, encryption, uploader, progressCallback) {
assert.strictEqual(typeof sourceFile, 'string');
@@ -199,18 +201,18 @@ async function restoreFsMetadata(dataLayout, metadataFile) {
}
}
async function downloadDir(backupTarget, backupFilePath, dataLayout, progressCallback) {
async function downloadDir(backupTarget, remotePath, dataLayout, progressCallback) {
assert.strictEqual(typeof backupTarget, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof remotePath, 'string');
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof progressCallback, 'function');
const encryptedFilenames = backupTarget.encryption?.encryptedFilenames || false;
debug(`downloadDir: ${backupFilePath} to ${dataLayout.toString()}. encryption filenames: ${encryptedFilenames} content: ${!!backupTarget.encryption}`);
debug(`downloadDir: ${remotePath} to ${dataLayout.toString()}. encryption filenames: ${encryptedFilenames} content: ${!!backupTarget.encryption}`);
async function downloadFile(entry) {
let relativePath = path.relative(backupFilePath, entry.path);
let relativePath = path.relative(remotePath, entry.path);
if (encryptedFilenames) {
const { error, result } = hush.decryptFilePath(relativePath, backupTarget.encryption);
if (error) throw new BoxError(BoxError.CRYPTO_ERROR, 'Unable to decrypt file');
@@ -261,7 +263,7 @@ async function downloadDir(backupTarget, backupFilePath, dataLayout, progressCal
const concurrency = backupTarget.limits?.downloadConcurrency || (backupTarget.provider === 's3' ? 30 : 10);
let marker = null;
while (true) {
const batch = await backupTargets.storageApi(backupTarget).listDir(backupTarget.config, backupFilePath, marker === null ? 1 : 1000, marker); // try with one file first. if that works out, we continue faster
const batch = await backupTargets.storageApi(backupTarget).listDir(backupTarget.config, remotePath, marker === null ? 1 : 1000, marker); // try with one file first. if that works out, we continue faster
await async.eachLimit(batch.entries, concurrency, downloadFile);
if (!batch.marker) break;
marker = batch.marker;
@@ -295,3 +297,72 @@ function getFileExtension(encryption) {
return ''; // this also signals to backupcleanear that we are dealing with directories
}
async function verify(backupTarget, remotePath, integrityMap, progressCallback) {
assert.strictEqual(typeof backupTarget, 'object');
assert.strictEqual(typeof remotePath, 'string');
assert(util.types.isMap(integrityMap), 'integrityMap should be a Map');
assert.strictEqual(typeof progressCallback, 'function');
debug(`verify: Verifying ${remotePath}`);
const encryptedFilenames = backupTarget.encryption?.encryptedFilenames || false;
let fileCount = 0;
async function validateFile(entry) {
let relativePath = path.relative(remotePath, entry.path);
if (encryptedFilenames) {
const { error, result } = hush.decryptFilePath(relativePath, backupTarget.encryption);
if (error) throw new BoxError(BoxError.CRYPTO_ERROR, 'Unable to decrypt file');
relativePath = result;
}
++fileCount;
const sourceStream = await backupTargets.storageApi(backupTarget).download(backupTarget.config, entry.path);
const ps = new ProgressStream({ interval: 10000 }); // display a progress every 10 seconds
ps.on('progress', function (progress) {
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
if (!transferred && !speed) return progressCallback({ message: `Downloading ${entry.path}` }); // 0M@0MBps looks wrong
progressCallback({ message: `Downloading ${entry.path}: ${transferred}M@${speed}MBps` });
});
const streams = [ sourceStream, ps ];
if (backupTarget.encryption) {
const decryptStream = new DecryptStream(backupTarget.encryption);
streams.push(decryptStream);
}
const hash = new HashStream();
streams.push(hash);
await stream.pipeline(streams);
const integrity = integrityMap.get(relativePath);
if (ps.stats().transferred !== integrity.size) throw new BoxError(BoxError.BAD_STATE, `${entry.path} has size ${ps.stats().transferred}. Expecting ${integrity.size}`);
if (hash.digest() !== integrity.sha256) throw new BoxError(BoxError.BAD_STATE, `${entry.path} has size ${hash.digest()}. Expecting ${integrity.sha256}`);
}
debug(integrityMap.entries());
// https://www.digitalocean.com/community/questions/rate-limiting-on-spaces?answer=40441
const concurrency = backupTarget.limits?.downloadConcurrency || (backupTarget.provider === 's3' ? 30 : 10);
let marker = null;
while (true) {
const batch = await backupTargets.storageApi(backupTarget).listDir(backupTarget.config, remotePath, marker === null ? 1 : 1000, marker); // try with one file first. if that works out, we continue faster
await async.eachLimit(batch.entries, concurrency, validateFile);
if (!batch.marker) break;
marker = batch.marker;
}
const check = (x, y) => { return x === y ? { status: 'passed' } : { status: 'failed', message: `Expecting ${x} but got ${y}` }; };
if (integrityMap.size !== fileCount) throw new BoxError(BoxError.BAD_STATE, `Got ${fileCount} files. Expecting ${integrityMap.size()} files`);
return {
size: { status: 'passed' },
fileCount: check(integrityMap.size, fileCount),
sha256: { status: 'passed' },
};
}