use node-tar for extract

we will switch over our tgz module to node-tar. Main advantage is that
it is used by npm. Currently, we have our own fork to ignore stat errors
in the other module.

unfortunately, I cannot get this to work with the create logic. It doesn't
support path modification - https://github.com/isaacs/node-tar/issues/271
so, will revert this immediately and keep this for future
This commit is contained in:
Girish Ramakrishnan
2024-07-04 14:10:59 +02:00
parent c6f4395578
commit 285feb4f8b
3 changed files with 385 additions and 56 deletions

View File

@@ -16,8 +16,10 @@ const assert = require('assert'),
path = require('path'),
ProgressStream = require('../progress-stream.js'),
promiseRetry = require('../promise-retry.js'),
stream = require('stream/promises'),
storage = require('../storage.js'),
tar = require('tar-fs'),
tar2 = require('tar'),
zlib = require('zlib');
function getBackupFilePath(backupConfig, remotePath) {
@@ -82,56 +84,43 @@ function tarPack(dataLayout, encryption) {
return ps;
}
function tarExtract(inStream, dataLayout, encryption) {
async function tarExtract(inStream, dataLayout, encryption, progressCallback) {
assert.strictEqual(typeof inStream, 'object');
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof encryption, 'object');
assert.strictEqual(typeof progressCallback, 'function');
const gunzip = zlib.createGunzip({});
const ps = new ProgressStream({ interval: 10000 }); // display a progress every 10 seconds
const extract = tar.extract('/', {
map: function (header) {
header.name = dataLayout.toLocalPath(header.name);
return header;
const extract = tar2.extract({
cwd: '/tmp',
dmode: 500, // ensure directory is writable
preserveOwner: false, // use uid/gid of current process
strict: true, // error on warnings,
preservePaths: true, // allow absolute paths. otherwise will sandbox to cwd
// to map paths, we cannot use transform hook because it is only used for files and not directories
// we can use filter hook as well - https://github.com/isaacs/node-tar/issues/357#issuecomment-1416491212
onReadEntry(entry) {
// debug(entry.header.path, entry.header.type, entry.header.size);
entry.path = dataLayout.toLocalPath(entry.header.path);
},
onwarn(code, message /*, data */) {
debug(`extract warning:${message} ${code}`);
},
dmode: 500 // ensure directory is writable
});
const emitError = once((error) => {
inStream.destroy();
ps.emit('error', error);
});
inStream.on('error', function (error) {
debug('tarExtract: input stream error. %o', error);
emitError(new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
gunzip.on('error', function (error) {
debug('tarExtract: gunzip stream error. %o', error);
emitError(new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
extract.on('error', function (error) {
debug('tarExtract: extract stream error. %o', error);
emitError(new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
extract.on('finish', function () {
debug('tarExtract: done.');
// we use a separate event because ps is a through2 stream which emits 'finish' event indicating end of inStream and not extract
ps.emit('done');
ps.on('progress', function (progress) {
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
if (!transferred && !speed) return progressCallback({ message: 'Downloading backup' }); // 0M@0MBps looks wrong
progressCallback({ message: `Downloading ${transferred}M@${speed}MBps` });
});
if (encryption) {
const decrypt = new DecryptStream(encryption);
decrypt.on('error', function (error) {
debug('tarExtract: decrypt stream error.', error);
emitError(new BoxError(BoxError.EXTERNAL_ERROR, `Failed to decrypt: ${error.message}`));
});
inStream.pipe(ps).pipe(decrypt).pipe(gunzip).pipe(extract);
await stream.pipeline(inStream, ps, decrypt, extract);
} else {
inStream.pipe(ps).pipe(gunzip).pipe(extract);
await stream.pipeline(inStream, ps, extract);
}
return ps;
@@ -149,19 +138,8 @@ async function download(backupConfig, remotePath, dataLayout, progressCallback)
await promiseRetry({ times: 5, interval: 20000, debug }, async () => {
progressCallback({ message: `Downloading backup ${backupFilePath}` });
const sourceStream = await storage.api(backupConfig.provider).download(backupConfig, backupFilePath);
const ps = tarExtract(sourceStream, dataLayout, backupConfig.encryption);
return await new Promise((resolve, reject) => {
ps.on('progress', function (progress) {
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
if (!transferred && !speed) return progressCallback({ message: 'Downloading backup' }); // 0M@0MBps looks wrong
progressCallback({ message: `Downloading ${transferred}M@${speed}MBps` });
});
ps.on('error', reject);
ps.on('done', resolve);
});
await tarExtract(sourceStream, dataLayout, backupConfig.encryption, progressCallback);
});
}