Files
cloudron-box/src/backupformat/rsync.js

264 lines
13 KiB
JavaScript
Raw Normal View History

'use strict';
exports = module.exports = {
getBackupFilePath,
download,
upload,
_saveFsMetadata: saveFsMetadata,
_restoreFsMetadata: restoreFsMetadata
};
const assert = require('assert'),
async = require('async'),
BoxError = require('../boxerror.js'),
DataLayout = require('../datalayout.js'),
{ DecryptStream } = require('../hush.js'),
debug = require('debug')('box:backupformat/rsync'),
{ EncryptStream } = require('../hush.js'),
fs = require('fs'),
hush = require('../hush.js'),
path = require('path'),
ProgressStream = require('../progress-stream.js'),
2023-08-01 19:03:24 +05:30
promiseRetry = require('../promise-retry.js'),
safe = require('safetydance'),
2024-02-20 23:09:49 +01:00
shell = require('../shell.js'),
storage = require('../storage.js'),
stream = require('stream/promises'),
syncer = require('../syncer.js'),
util = require('util');
function getBackupFilePath(backupConfig, remotePath) {
assert.strictEqual(typeof backupConfig, 'object');
assert.strictEqual(typeof remotePath, 'string');
// we don't have a rootPath for noop
if (backupConfig.provider === 'noop') return remotePath;
return path.join(backupConfig.rootPath, remotePath);
}
async function addFile(sourceFile, encryption, uploader, progressCallback) {
assert.strictEqual(typeof sourceFile, 'string');
assert.strictEqual(typeof encryption, 'object');
assert.strictEqual(typeof uploader, 'object');
assert.strictEqual(typeof progressCallback, 'function');
// make sure file can be opened for reading before we start the pipeline. otherwise, we end up with
// destinations dirs/file which are owned by root (this process id) and cannot be copied (run as normal user)
const [openError, sourceHandle] = await safe(fs.promises.open(sourceFile, 'r'));
if (openError) {
debug(`addFile: ignoring disappeared file: ${sourceFile}`);
return;
}
const sourceStream = sourceHandle.createReadStream(sourceFile, { autoClose: true });
const ps = new ProgressStream({ interval: 10000 }); // display a progress every 10 seconds
ps.on('progress', function (progress) {
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
if (!transferred && !speed) return progressCallback({ message: `Uploading ${sourceFile}` }); // 0M@0MBps looks wrong
progressCallback({ message: `Uploading ${sourceFile}: ${transferred}M@${speed}MBps` }); // 0M@0MBps looks wrong
});
let pipeline = null;
if (encryption) {
const encryptStream = new EncryptStream(encryption);
pipeline = safe(stream.pipeline(sourceStream, encryptStream, ps, uploader.stream));
} else {
pipeline = safe(stream.pipeline(sourceStream, ps, uploader.stream));
}
const [error] = await safe(pipeline);
if (error && error.message.includes('ENOENT')) { // ignore error if file disappears
}
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `tarPack pipeline error: ${error.message}`);
// debug(`addFile: pipeline finished: ${JSON.stringify(ps.stats())}`);
await uploader.finish();
}
2024-07-09 23:43:16 +02:00
async function sync(backupConfig, remotePath, dataLayout, progressCallback) {
assert.strictEqual(typeof backupConfig, 'object');
assert.strictEqual(typeof remotePath, 'string');
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof progressCallback, 'function');
// the number here has to take into account the s3.upload partSize (which is 10MB). So 20=200MB
const concurrency = backupConfig.limits?.syncConcurrency || (backupConfig.provider === 's3' ? 20 : 10);
2024-07-09 23:43:16 +02:00
await syncer.sync(dataLayout, async function processTask(task) {
debug('sync: processing task: %j', task);
// the empty task.path is special to signify the directory
2022-06-27 09:17:01 -07:00
const destPath = task.path && backupConfig.encryptedFilenames ? hush.encryptFilePath(task.path, backupConfig.encryption) : task.path;
const backupFilePath = path.join(getBackupFilePath(backupConfig, remotePath), destPath);
if (task.operation === 'removedir') {
debug(`Removing directory ${backupFilePath}`);
await storage.api(backupConfig.provider).removeDir(backupConfig, backupFilePath, progressCallback);
} else if (task.operation === 'remove') {
debug(`Removing ${backupFilePath}`);
await storage.api(backupConfig.provider).remove(backupConfig, backupFilePath);
} else if (task.operation === 'add') {
await promiseRetry({ times: 5, interval: 20000, debug }, async (retryCount) => {
progressCallback({ message: `Adding ${task.path}` + (retryCount > 1 ? ` (Try ${retryCount})` : '') });
debug(`Adding ${task.path} position ${task.position} try ${retryCount}`);
const uploader = await storage.api(backupConfig.provider).upload(backupConfig, backupFilePath);
await addFile(dataLayout.toLocalPath('./' + task.path), backupConfig.encryption, uploader, progressCallback);
});
}
2024-07-09 23:43:16 +02:00
}, concurrency);
}
// this is not part of 'snapshotting' because we need root access to traverse
async function saveFsMetadata(dataLayout, metadataFile) {
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof metadataFile, 'string');
// contains paths prefixed with './'
const metadata = {
emptyDirs: [],
execFiles: [],
symlinks: []
};
// we assume small number of files. spawnSync will raise a ENOBUFS error after maxBuffer
2024-07-08 13:18:22 +02:00
for (const lp of dataLayout.localPaths()) {
2024-02-21 19:40:27 +01:00
const emptyDirs = await shell.exec('saveFsMetadata', `find ${lp} -type d -empty`, { maxBuffer: 1024 * 1024 * 80 });
if (emptyDirs.length) metadata.emptyDirs = metadata.emptyDirs.concat(emptyDirs.trim().split('\n').map((ed) => dataLayout.toRemotePath(ed)));
2024-02-21 19:40:27 +01:00
const execFiles = await shell.exec('saveFsMetadata', `find ${lp} -type f -executable`, { maxBuffer: 1024 * 1024 * 80 });
if (execFiles.length) metadata.execFiles = metadata.execFiles.concat(execFiles.trim().split('\n').map((ef) => dataLayout.toRemotePath(ef)));
const symlinkFiles = await shell.exec('safeFsMetadata', `find ${lp} -type l`, { maxBuffer: 1024 * 1024 * 30 });
if (symlinkFiles.length) metadata.symlinks = metadata.symlinks.concat(symlinkFiles.trim().split('\n').map((sl) => {
const target = safe.fs.readlinkSync(sl);
return { path: dataLayout.toRemotePath(sl), target };
}));
}
if (!safe.fs.writeFileSync(metadataFile, JSON.stringify(metadata, null, 4))) throw new BoxError(BoxError.FS_ERROR, `Error writing fs metadata: ${safe.error.message}`);
}
async function restoreFsMetadata(dataLayout, metadataFile) {
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof metadataFile, 'string');
debug(`Recreating empty directories in ${dataLayout.toString()}`);
const metadataJson = safe.fs.readFileSync(metadataFile, 'utf8');
if (metadataJson === null) throw new BoxError(BoxError.EXTERNAL_ERROR, 'Error loading fsmetadata.json:' + safe.error.message);
const metadata = safe.JSON.parse(metadataJson);
if (metadata === null) throw new BoxError(BoxError.EXTERNAL_ERROR, 'Error parsing fsmetadata.json:' + safe.error.message);
for (const emptyDir of metadata.emptyDirs) {
const [mkdirError] = await safe(fs.promises.mkdir(dataLayout.toLocalPath(emptyDir), { recursive: true }));
if (mkdirError) throw new BoxError(BoxError.FS_ERROR, `unable to create path: ${mkdirError.message}`);
}
for (const execFile of metadata.execFiles) {
const [chmodError] = await safe(fs.promises.chmod(dataLayout.toLocalPath(execFile), parseInt('0755', 8)));
if (chmodError) throw new BoxError(BoxError.FS_ERROR, `unable to chmod: ${chmodError.message}`);
}
for (const symlink of (metadata.symlinks || [])) {
if (!symlink.target) continue;
// the path may not exist if we had a directory full of symlinks
const [mkdirError] = await safe(fs.promises.mkdir(path.dirname(dataLayout.toLocalPath(symlink.path)), { recursive: true }));
if (mkdirError) throw new BoxError(BoxError.FS_ERROR, `unable to symlink (mkdir): ${mkdirError.message}`);
const [symlinkError] = await safe(fs.promises.symlink(symlink.target, dataLayout.toLocalPath(symlink.path), 'file'));
if (symlinkError) throw new BoxError(BoxError.FS_ERROR, `unable to symlink: ${symlinkError.message}`);
}
}
function downloadDir(backupConfig, backupFilePath, dataLayout, progressCallback, callback) {
assert.strictEqual(typeof backupConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof progressCallback, 'function');
assert.strictEqual(typeof callback, 'function');
debug(`downloadDir: ${backupFilePath} to ${dataLayout.toString()}`);
2023-08-01 19:03:24 +05:30
async function downloadFile(entry) {
let relativePath = path.relative(backupFilePath, entry.fullPath);
2022-06-27 09:17:01 -07:00
if (backupConfig.encryptedFilenames) {
const { error, result } = hush.decryptFilePath(relativePath, backupConfig.encryption);
2023-08-01 19:03:24 +05:30
if (error) throw new BoxError(BoxError.CRYPTO_ERROR, 'Unable to decrypt file');
relativePath = result;
}
const destFilePath = dataLayout.toLocalPath('./' + relativePath);
2023-08-01 19:03:24 +05:30
const [mkdirError] = await safe(fs.promises.mkdir(path.dirname(destFilePath), { recursive: true }));
if (mkdirError) throw new BoxError(BoxError.FS_ERROR, mkdirError.message);
2023-08-01 19:03:24 +05:30
await promiseRetry({ times: 5, interval: 20000 }, async function () {
const [downloadError, sourceStream] = await safe(storage.api(backupConfig.provider).download(backupConfig, entry.fullPath));
if (downloadError) {
progressCallback({ message: `Download ${entry.fullPath} to ${destFilePath} errored: ${downloadError.message}` });
throw downloadError;
}
const ps = new ProgressStream({ interval: 10000 }); // display a progress every 10 seconds
ps.on('progress', function (progress) {
2023-08-01 19:03:24 +05:30
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
if (!transferred && !speed) return progressCallback({ message: `Downloading ${entry.fullPath}` }); // 0M@0MBps looks wrong
progressCallback({ message: `Downloading ${entry.fullPath}: ${transferred}M@${speed}MBps` });
});
const destStream = fs.createWriteStream(destFilePath);
const streams = [ sourceStream, ps ];
if (backupConfig.encryption) {
const decryptStream = new DecryptStream(backupConfig.encryption);
streams.push(decryptStream);
}
streams.push(destStream);
2023-08-01 19:03:24 +05:30
progressCallback({ message: `Downloading ${entry.fullPath} to ${destFilePath}` });
const [pipelineError] = await safe(stream.pipeline(streams));
2023-08-01 19:03:24 +05:30
if (pipelineError) {
progressCallback({ message: `Download error ${entry.fullPath} to ${destFilePath}: ${pipelineError.message}` });
throw pipelineError;
}
progressCallback({ message: `Download finished ${entry.fullPath} to ${destFilePath}` });
});
}
storage.api(backupConfig.provider).listDir(backupConfig, backupFilePath, 1000, function (entries, iteratorDone) {
// https://www.digitalocean.com/community/questions/rate-limiting-on-spaces?answer=40441
const concurrency = backupConfig.limits?.downloadConcurrency || (backupConfig.provider === 's3' ? 30 : 10);
async.eachLimit(entries, concurrency, downloadFile, iteratorDone);
}, callback);
}
2022-04-30 16:42:14 -07:00
async function download(backupConfig, remotePath, dataLayout, progressCallback) {
assert.strictEqual(typeof backupConfig, 'object');
assert.strictEqual(typeof remotePath, 'string');
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof progressCallback, 'function');
const backupFilePath = getBackupFilePath(backupConfig, remotePath);
2022-04-30 16:42:14 -07:00
const downloadDirAsync = util.promisify(downloadDir);
2023-08-02 09:50:02 +05:30
debug(`download: Downloading ${backupFilePath} to ${dataLayout.toString()}`);
2022-04-30 16:42:14 -07:00
await downloadDirAsync(backupConfig, backupFilePath, dataLayout, progressCallback);
await restoreFsMetadata(dataLayout, `${dataLayout.localRoot()}/fsmetadata.json`);
}
2022-04-30 16:42:14 -07:00
async function upload(backupConfig, remotePath, dataLayout, progressCallback) {
assert.strictEqual(typeof backupConfig, 'object');
assert.strictEqual(typeof remotePath, 'string');
assert.strictEqual(typeof dataLayout, 'object');
assert.strictEqual(typeof progressCallback, 'function');
2022-04-30 16:42:14 -07:00
await saveFsMetadata(dataLayout, `${dataLayout.localRoot()}/fsmetadata.json`);
2024-07-09 23:43:16 +02:00
await sync(backupConfig, remotePath, dataLayout, progressCallback);
}