2022-04-28 18:43:14 -07:00
|
|
|
'use strict';
|
|
|
|
|
|
2025-08-14 11:17:38 +05:30
|
|
|
const assert = require('node:assert'),
|
2025-08-01 14:54:32 +02:00
|
|
|
backupTargets = require('../backuptargets.js'),
|
2022-04-28 18:43:14 -07:00
|
|
|
BoxError = require('../boxerror.js'),
|
|
|
|
|
DataLayout = require('../datalayout.js'),
|
|
|
|
|
debug = require('debug')('box:backupformat/tgz'),
|
|
|
|
|
{ DecryptStream, EncryptStream } = require('../hush.js'),
|
2025-08-14 11:17:38 +05:30
|
|
|
fs = require('node:fs'),
|
2025-08-13 18:38:56 +05:30
|
|
|
HashStream = require('../hash-stream.js'),
|
2025-08-14 11:17:38 +05:30
|
|
|
path = require('node:path'),
|
2022-11-06 10:17:14 +01:00
|
|
|
ProgressStream = require('../progress-stream.js'),
|
2023-07-25 09:56:58 +05:30
|
|
|
promiseRetry = require('../promise-retry.js'),
|
2024-07-05 17:53:35 +02:00
|
|
|
safe = require('safetydance'),
|
|
|
|
|
stream = require('stream/promises'),
|
2024-07-18 15:13:38 +02:00
|
|
|
{ Transform } = require('node:stream'),
|
2024-07-05 17:53:35 +02:00
|
|
|
tar = require('tar-stream'),
|
2025-08-14 11:17:38 +05:30
|
|
|
zlib = require('node:zlib');
|
2022-04-28 18:43:14 -07:00
|
|
|
|
2024-07-18 15:13:38 +02:00
|
|
|
// In tar, the entry header contains the file size. If we don't provide it those many bytes, the tar will become corrupt
|
|
|
|
|
// Linux provides no guarantee of how many bytes can be read from a file. This is the case with sqlite and log files
|
|
|
|
|
// which are accessed by other processes when tar is in action. This class handles overflow and underflow
|
|
|
|
|
class EnsureFileSizeStream extends Transform {
|
|
|
|
|
constructor(options) {
|
|
|
|
|
super(options);
|
|
|
|
|
this._remaining = options.size;
|
|
|
|
|
this._name = options.name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_transform(chunk, encoding, callback) {
|
|
|
|
|
if (this._remaining <= 0) {
|
|
|
|
|
debug(`EnsureFileSizeStream: ${this._name} dropping ${chunk.length} bytes`);
|
|
|
|
|
return callback(null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (this._remaining - chunk.length < 0) {
|
|
|
|
|
debug(`EnsureFileSizeStream: ${this._name} dropping extra ${chunk.length - this._remaining} bytes`);
|
|
|
|
|
chunk = chunk.subarray(0, this._remaining);
|
|
|
|
|
this._remaining = 0;
|
|
|
|
|
} else {
|
|
|
|
|
this._remaining -= chunk.length;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
callback(null, chunk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_flush(callback) {
|
|
|
|
|
if (this._remaining > 0) {
|
|
|
|
|
debug(`EnsureFileSizeStream: ${this._name} injecting ${this._remaining} bytes`);
|
|
|
|
|
this.push(Buffer.alloc(this._remaining, 0));
|
|
|
|
|
}
|
|
|
|
|
callback();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-18 13:40:36 +02:00
|
|
|
function addEntryToPack(pack, header, options) {
|
2024-07-18 13:31:29 +02:00
|
|
|
assert.strictEqual(typeof pack, 'object');
|
|
|
|
|
assert.strictEqual(typeof header, 'object');
|
|
|
|
|
assert.strictEqual(typeof options, 'object'); // { input }
|
|
|
|
|
|
2024-07-05 17:53:35 +02:00
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
const packEntry = safe(() => pack.entry(header, function (error) {
|
|
|
|
|
if (error) {
|
|
|
|
|
debug(`addToPack: error adding ${header.name} ${header.type} ${error.message}`);
|
2024-07-10 19:09:02 +02:00
|
|
|
reject(new BoxError(BoxError.FS_ERROR, error.message));
|
2024-07-05 17:53:35 +02:00
|
|
|
} else {
|
|
|
|
|
debug(`addToPack: added ${header.name} ${header.type}`);
|
|
|
|
|
resolve();
|
|
|
|
|
}
|
|
|
|
|
}));
|
|
|
|
|
|
2024-07-18 13:31:29 +02:00
|
|
|
if (!packEntry) return reject(new BoxError(BoxError.FS_ERROR, `Failed to add ${header.name}: ${safe.error.message}`));
|
2024-07-05 17:53:35 +02:00
|
|
|
|
2024-07-18 15:13:38 +02:00
|
|
|
if (options?.input) {
|
|
|
|
|
const ensureFileSizeStream = new EnsureFileSizeStream({ name: header.name, size: header.size });
|
|
|
|
|
safe(stream.pipeline(options.input, ensureFileSizeStream, packEntry), { debug }); // background. rely on pack.entry callback for promise completion
|
|
|
|
|
}
|
2024-07-05 17:53:35 +02:00
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-18 13:40:36 +02:00
|
|
|
async function addPathToPack(pack, localPath, dataLayout) {
|
|
|
|
|
assert.strictEqual(typeof pack, 'object');
|
|
|
|
|
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
|
|
|
|
|
assert.strictEqual(typeof localPath, 'string');
|
|
|
|
|
|
2025-08-13 19:22:14 +05:30
|
|
|
const stats = { fileCount: 0, linkCount: 0, dirCount: 0 };
|
|
|
|
|
|
2024-07-18 13:40:36 +02:00
|
|
|
const queue = [ localPath ];
|
|
|
|
|
while (queue.length) {
|
|
|
|
|
// if (pack.destroyed || outStream.destroyed) break;
|
|
|
|
|
const dir = queue.shift();
|
|
|
|
|
debug(`tarPack: processing ${dir}`);
|
|
|
|
|
const [readdirError, entries] = await safe(fs.promises.readdir(dir, { withFileTypes: true }));
|
|
|
|
|
if (!entries) {
|
|
|
|
|
debug(`tarPack: skipping directory ${dir}: ${readdirError.message}`);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
const subdirs = [];
|
|
|
|
|
for (const entry of entries) {
|
|
|
|
|
const abspath = path.join(dir, entry.name);
|
|
|
|
|
const headerName = dataLayout.toRemotePath(abspath);
|
|
|
|
|
if (entry.isFile()) {
|
|
|
|
|
const [openError, handle] = await safe(fs.promises.open(abspath, 'r'));
|
|
|
|
|
if (!handle) { debug(`tarPack: skipping file, could not open ${abspath}: ${openError.message}`); continue; }
|
|
|
|
|
const [statError, stat] = await safe(handle.stat());
|
|
|
|
|
if (!stat) { debug(`tarPack: skipping file, could not stat ${abspath}: ${statError.message}`); continue; }
|
|
|
|
|
const header = { name: headerName, type: 'file', mode: stat.mode, size: stat.size, uid: process.getuid(), gid: process.getgid() };
|
|
|
|
|
if (stat.size > 8589934590 || entry.name.length > 99) header.pax = { size: stat.size };
|
|
|
|
|
const input = handle.createReadStream({ autoClose: true });
|
|
|
|
|
await addEntryToPack(pack, header, { input });
|
2025-08-13 19:22:14 +05:30
|
|
|
++stats.fileCount;
|
2024-07-18 13:40:36 +02:00
|
|
|
} else if (entry.isDirectory()) {
|
|
|
|
|
const header = { name: headerName, type: 'directory', uid: process.getuid(), gid: process.getgid() };
|
|
|
|
|
subdirs.push(abspath);
|
|
|
|
|
await addEntryToPack(pack, header, { /* options */ });
|
2025-08-13 19:22:14 +05:30
|
|
|
++stats.dirCount;
|
2024-07-18 13:40:36 +02:00
|
|
|
} else if (entry.isSymbolicLink()) {
|
|
|
|
|
const [readlinkError, target] = await safe(fs.promises.readlink(abspath));
|
|
|
|
|
if (!target) { debug(`tarPack: skipping link, could not readlink ${abspath}: ${readlinkError.message}`); continue; }
|
|
|
|
|
const header = { name: headerName, type: 'symlink', linkname: target, uid: process.getuid(), gid: process.getgid() };
|
|
|
|
|
await addEntryToPack(pack, header, { /* options */ });
|
2025-08-13 19:22:14 +05:30
|
|
|
++stats.linkCount;
|
2024-07-18 13:40:36 +02:00
|
|
|
} else {
|
|
|
|
|
debug(`tarPack: ignoring unknown type ${entry.name} ${entry.type}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
queue.unshift(...subdirs); // add to front of queue and in order of readdir listing
|
|
|
|
|
}
|
2025-08-13 19:22:14 +05:30
|
|
|
|
|
|
|
|
return stats;
|
2024-07-18 13:40:36 +02:00
|
|
|
}
|
|
|
|
|
|
2024-07-05 17:53:35 +02:00
|
|
|
async function tarPack(dataLayout, encryption, uploader, progressCallback) {
|
2022-04-28 18:43:14 -07:00
|
|
|
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
|
|
|
|
|
assert.strictEqual(typeof encryption, 'object');
|
2024-07-05 17:53:35 +02:00
|
|
|
assert.strictEqual(typeof uploader, 'object');
|
|
|
|
|
assert.strictEqual(typeof progressCallback, 'function');
|
2022-04-28 18:43:14 -07:00
|
|
|
|
|
|
|
|
const gzip = zlib.createGzip({});
|
2022-11-06 10:17:14 +01:00
|
|
|
const ps = new ProgressStream({ interval: 10000 }); // emit 'progress' every 10 seconds
|
2024-07-05 17:53:35 +02:00
|
|
|
ps.on('progress', function (progress) {
|
|
|
|
|
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
|
|
|
|
|
if (!transferred && !speed) return progressCallback({ message: 'Uploading backup' }); // 0M@0MBps looks wrong
|
|
|
|
|
progressCallback({ message: `Uploading backup ${transferred}M@${speed}MBps` });
|
2022-04-28 18:43:14 -07:00
|
|
|
});
|
|
|
|
|
|
2024-07-05 17:53:35 +02:00
|
|
|
const pack = tar.pack();
|
2022-04-28 18:43:14 -07:00
|
|
|
|
2025-08-13 18:38:56 +05:30
|
|
|
const hash = new HashStream();
|
2025-08-11 19:30:22 +05:30
|
|
|
|
2024-07-05 17:53:35 +02:00
|
|
|
let pipeline = null;
|
2022-04-28 18:43:14 -07:00
|
|
|
if (encryption) {
|
|
|
|
|
const encryptStream = new EncryptStream(encryption);
|
2025-08-11 19:30:22 +05:30
|
|
|
pipeline = safe(stream.pipeline(pack, gzip, encryptStream, ps, hash, uploader.stream));
|
2022-04-28 18:43:14 -07:00
|
|
|
} else {
|
2025-08-11 19:30:22 +05:30
|
|
|
pipeline = safe(stream.pipeline(pack, gzip, ps, hash, uploader.stream));
|
2024-07-05 17:53:35 +02:00
|
|
|
}
|
|
|
|
|
|
2025-08-13 19:22:14 +05:30
|
|
|
let fileCount = 0;
|
2024-07-05 17:53:35 +02:00
|
|
|
for (const localPath of dataLayout.localPaths()) {
|
2025-08-13 19:22:14 +05:30
|
|
|
const [error, stats] = await safe(addPathToPack(pack, localPath, dataLayout), { debug });
|
2024-07-18 13:40:36 +02:00
|
|
|
if (error) break; // the pipeline will error and we will retry the whole packing all over
|
2025-08-13 19:22:14 +05:30
|
|
|
fileCount += stats.fileCount;
|
2022-04-28 18:43:14 -07:00
|
|
|
}
|
|
|
|
|
|
2024-07-18 13:40:36 +02:00
|
|
|
pack.finalize(); // harmless to call if already in error state
|
2024-07-05 17:53:35 +02:00
|
|
|
|
2024-07-10 19:09:02 +02:00
|
|
|
const [error] = await pipeline; // already wrapped in safe()
|
2024-07-05 17:53:35 +02:00
|
|
|
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `tarPack pipeline error: ${error.message}`);
|
|
|
|
|
debug(`tarPack: pipeline finished: ${JSON.stringify(ps.stats())}`);
|
|
|
|
|
|
|
|
|
|
await uploader.finish();
|
2025-08-12 19:41:50 +05:30
|
|
|
return {
|
2025-08-13 19:22:14 +05:30
|
|
|
stats: { fileCount, ...ps.stats() },
|
2025-08-13 19:33:39 +05:30
|
|
|
integrity: { size: ps.stats().transferred, fileCount, sha256: hash.digest('hex') }
|
2025-08-12 19:41:50 +05:30
|
|
|
};
|
2022-04-28 18:43:14 -07:00
|
|
|
}
|
|
|
|
|
|
2024-07-05 17:53:35 +02:00
|
|
|
async function tarExtract(inStream, dataLayout, encryption, progressCallback) {
|
2022-04-28 18:43:14 -07:00
|
|
|
assert.strictEqual(typeof inStream, 'object');
|
|
|
|
|
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
|
|
|
|
|
assert.strictEqual(typeof encryption, 'object');
|
2024-07-05 17:53:35 +02:00
|
|
|
assert.strictEqual(typeof progressCallback, 'function');
|
2022-04-28 18:43:14 -07:00
|
|
|
|
2024-07-05 17:53:35 +02:00
|
|
|
const extract = tar.extract();
|
|
|
|
|
const now = new Date();
|
|
|
|
|
extract.on('entry', async function (header, entryStream, next) {
|
|
|
|
|
if (path.isAbsolute(header.name)) {
|
|
|
|
|
debug(`tarExtract: ignoring absolute path ${header.name}`);
|
|
|
|
|
return next();
|
|
|
|
|
}
|
|
|
|
|
const abspath = dataLayout.toLocalPath(header.name);
|
|
|
|
|
debug(`tarExtract: ${header.name} ${header.size} ${header.type} to ${abspath}`);
|
|
|
|
|
let error = null;
|
|
|
|
|
if (header.type === 'directory') {
|
|
|
|
|
[error] = await safe(fs.promises.mkdir(abspath, { recursive: true, mode: 0o755 }));
|
|
|
|
|
} else if (header.type === 'file') {
|
|
|
|
|
const output = fs.createWriteStream(abspath);
|
|
|
|
|
[error] = await safe(stream.pipeline(entryStream, output));
|
2024-07-11 17:22:13 +02:00
|
|
|
if (!error) [error] = await safe(fs.promises.chmod(abspath, header.mode));
|
2024-07-05 17:53:35 +02:00
|
|
|
} else if (header.type === 'symlink') {
|
|
|
|
|
await safe(fs.promises.unlink(abspath)); // remove any link created from previous failed extract
|
|
|
|
|
[error] = await safe(fs.promises.symlink(header.linkname, abspath));
|
|
|
|
|
} else {
|
|
|
|
|
debug(`tarExtract: ignoring unknown entry: ${header.name} ${header.type}`);
|
|
|
|
|
entryStream.resume(); // drain
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (error) return next(error);
|
|
|
|
|
|
|
|
|
|
[error] = await safe(fs.promises.lutimes(abspath, now /* atime */, header.mtime)); // for dirs, mtime will get overwritten
|
|
|
|
|
next(error);
|
2024-07-05 09:26:38 +02:00
|
|
|
});
|
2024-07-05 17:53:35 +02:00
|
|
|
extract.on('finish', () => debug('tarExtract: extract finished'));
|
2024-07-05 09:26:38 +02:00
|
|
|
|
2024-07-05 17:53:35 +02:00
|
|
|
const gunzip = zlib.createGunzip({});
|
|
|
|
|
const ps = new ProgressStream({ interval: 10000 });
|
|
|
|
|
ps.on('progress', function (progress) {
|
|
|
|
|
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
|
|
|
|
|
if (!transferred && !speed) return progressCallback({ message: 'Downloading backup' }); // 0M@0MBps looks wrong
|
|
|
|
|
progressCallback({ message: `Downloading ${transferred}M@${speed}MBps` });
|
2022-04-28 18:43:14 -07:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (encryption) {
|
2022-04-28 21:58:00 -07:00
|
|
|
const decrypt = new DecryptStream(encryption);
|
2024-07-05 17:53:35 +02:00
|
|
|
const [error] = await safe(stream.pipeline(inStream, ps, decrypt, gunzip, extract));
|
|
|
|
|
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `tarExtract pipeline error: ${error.message}`);
|
2022-04-28 18:43:14 -07:00
|
|
|
} else {
|
2024-07-05 17:53:35 +02:00
|
|
|
const [error] = await safe(stream.pipeline(inStream, ps, gunzip, extract));
|
|
|
|
|
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `tarExtract pipeline error: ${error.message}`);
|
2022-04-28 18:43:14 -07:00
|
|
|
}
|
|
|
|
|
|
2025-08-05 14:13:39 +02:00
|
|
|
debug(`tarExtract: pipeline finished: ${JSON.stringify(ps.stats())}`);
|
2022-04-28 18:43:14 -07:00
|
|
|
}
|
|
|
|
|
|
2025-07-24 19:02:02 +02:00
|
|
|
async function download(backupTarget, remotePath, dataLayout, progressCallback) {
|
|
|
|
|
assert.strictEqual(typeof backupTarget, 'object');
|
2022-04-28 18:43:14 -07:00
|
|
|
assert.strictEqual(typeof remotePath, 'string');
|
|
|
|
|
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
|
|
|
|
|
assert.strictEqual(typeof progressCallback, 'function');
|
|
|
|
|
|
|
|
|
|
debug(`download: Downloading ${remotePath} to ${dataLayout.toString()}`);
|
|
|
|
|
|
2023-07-25 09:56:58 +05:30
|
|
|
await promiseRetry({ times: 5, interval: 20000, debug }, async () => {
|
2025-08-02 01:46:29 +02:00
|
|
|
progressCallback({ message: `Downloading backup ${remotePath}` });
|
2024-07-05 09:26:38 +02:00
|
|
|
|
2025-08-02 01:46:29 +02:00
|
|
|
const sourceStream = await backupTargets.storageApi(backupTarget).download(backupTarget.config, remotePath);
|
2025-07-24 19:02:02 +02:00
|
|
|
await tarExtract(sourceStream, dataLayout, backupTarget.encryption, progressCallback);
|
2022-04-30 16:42:14 -07:00
|
|
|
});
|
2022-04-28 18:43:14 -07:00
|
|
|
}
|
|
|
|
|
|
2025-07-24 19:02:02 +02:00
|
|
|
async function upload(backupTarget, remotePath, dataLayout, progressCallback) {
|
|
|
|
|
assert.strictEqual(typeof backupTarget, 'object');
|
2022-04-28 18:43:14 -07:00
|
|
|
assert.strictEqual(typeof remotePath, 'string');
|
|
|
|
|
assert.strictEqual(typeof dataLayout, 'object');
|
|
|
|
|
assert.strictEqual(typeof progressCallback, 'function');
|
|
|
|
|
|
2022-11-05 08:43:02 +01:00
|
|
|
debug(`upload: Uploading ${dataLayout.toString()} to ${remotePath}`);
|
|
|
|
|
|
2025-08-11 19:30:22 +05:30
|
|
|
return await promiseRetry({ times: 5, interval: 20000, debug }, async () => {
|
2025-08-02 01:46:29 +02:00
|
|
|
progressCallback({ message: `Uploading backup ${remotePath}` });
|
2024-07-05 17:53:35 +02:00
|
|
|
|
2025-08-02 01:46:29 +02:00
|
|
|
const uploader = await backupTargets.storageApi(backupTarget).upload(backupTarget.config, remotePath);
|
2025-08-12 19:41:50 +05:30
|
|
|
const { stats, integrity } = await tarPack(dataLayout, backupTarget.encryption, uploader, progressCallback);
|
2025-08-11 19:30:22 +05:30
|
|
|
|
2025-08-15 14:33:31 +05:30
|
|
|
const integrityMap = new Map([ [path.basename(remotePath), integrity] ]);
|
|
|
|
|
return { stats, integrityMap };
|
2022-04-30 16:42:14 -07:00
|
|
|
});
|
2022-04-28 18:43:14 -07:00
|
|
|
}
|
2024-07-18 15:39:45 +02:00
|
|
|
|
2025-08-01 22:58:19 +02:00
|
|
|
function getFileExtension(encryption) {
|
|
|
|
|
assert.strictEqual(typeof encryption, 'boolean');
|
|
|
|
|
|
|
|
|
|
return encryption ? '.tar.gz.enc' : '.tar.gz';
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-18 15:39:45 +02:00
|
|
|
exports = module.exports = {
|
|
|
|
|
download,
|
|
|
|
|
upload,
|
2025-08-01 22:58:19 +02:00
|
|
|
getFileExtension,
|
2024-07-18 15:39:45 +02:00
|
|
|
|
|
|
|
|
// exported for testing
|
|
|
|
|
_EnsureFileSizeStream: EnsureFileSizeStream
|
|
|
|
|
};
|