split up backupformat logic into separate files

This commit is contained in:
Girish Ramakrishnan
2022-04-28 18:43:14 -07:00
parent c67d9fd082
commit 7123ec433c
9 changed files with 539 additions and 436 deletions

193
src/backupformat/tgz.js Normal file
View File

@@ -0,0 +1,193 @@
'use strict';
exports = module.exports = {
getBackupFilePath,
download,
upload
};
const assert = require('assert'),
async = require('async'),
BoxError = require('../boxerror.js'),
DataLayout = require('../datalayout.js'),
debug = require('debug')('box:backupformat/tgz'),
{ DecryptStream, EncryptStream } = require('../hush.js'),
once = require('../once.js'),
path = require('path'),
progressStream = require('progress-stream'),
storage = require('../storage.js'),
tar = require('tar-fs'),
zlib = require('zlib');
function getBackupFilePath(backupConfig, remotePath) {
assert.strictEqual(typeof backupConfig, 'object');
assert.strictEqual(typeof remotePath, 'string');
const rootPath = storage.api(backupConfig.provider).getRootPath(backupConfig);
const fileType = backupConfig.encryption ? '.tar.gz.enc' : '.tar.gz';
return path.join(rootPath, remotePath + fileType);
}
function tarPack(dataLayout, encryption, callback) {
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof encryption, 'object');
assert.strictEqual(typeof callback, 'function');
const pack = tar.pack('/', {
dereference: false, // pack the symlink and not what it points to
entries: dataLayout.localPaths(),
ignoreStatError: (path, err) => {
debug(`tarPack: error stat'ing ${path} - ${err.code}`);
return err.code === 'ENOENT'; // ignore if file or dir got removed (probably some temporary file)
},
map: function(header) {
header.name = dataLayout.toRemotePath(header.name);
// the tar pax format allows us to encode filenames > 100 and size > 8GB (see #640)
// https://www.systutorials.com/docs/linux/man/5-star/
if (header.size > 8589934590 || header.name > 99) header.pax = { size: header.size };
return header;
},
strict: false // do not error for unknown types (skip fifo, char/block devices)
});
const gzip = zlib.createGzip({});
const ps = progressStream({ time: 10000 }); // emit 'progress' every 10 seconds
pack.on('error', function (error) {
debug('tarPack: tar stream error.', error);
ps.emit('error', new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
gzip.on('error', function (error) {
debug('tarPack: gzip stream error.', error);
ps.emit('error', new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
if (encryption) {
const encryptStream = new EncryptStream(encryption);
encryptStream.on('error', function (error) {
debug('tarPack: encrypt stream error.', error);
ps.emit('error', new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
pack.pipe(gzip).pipe(encryptStream).pipe(ps);
} else {
pack.pipe(gzip).pipe(ps);
}
return callback(null, ps);
}
function tarExtract(inStream, dataLayout, encryption, callback) {
assert.strictEqual(typeof inStream, 'object');
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof encryption, 'object');
assert.strictEqual(typeof callback, 'function');
const gunzip = zlib.createGunzip({});
const ps = progressStream({ time: 10000 }); // display a progress every 10 seconds
const extract = tar.extract('/', {
map: function (header) {
header.name = dataLayout.toLocalPath(header.name);
return header;
},
dmode: 500 // ensure directory is writable
});
const emitError = once((error) => {
inStream.destroy();
ps.emit('error', error);
});
inStream.on('error', function (error) {
debug('tarExtract: input stream error.', error);
emitError(new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
gunzip.on('error', function (error) {
debug('tarExtract: gunzip stream error.', error);
emitError(new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
extract.on('error', function (error) {
debug('tarExtract: extract stream error.', error);
emitError(new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
extract.on('finish', function () {
debug('tarExtract: done.');
// we use a separate event because ps is a through2 stream which emits 'finish' event indicating end of inStream and not extract
ps.emit('done');
});
if (encryption) {
let decrypt = new DecryptStream(encryption);
decrypt.on('error', function (error) {
debug('tarExtract: decrypt stream error.', error);
emitError(new BoxError(BoxError.EXTERNAL_ERROR, `Failed to decrypt: ${error.message}`));
});
inStream.pipe(ps).pipe(decrypt).pipe(gunzip).pipe(extract);
} else {
inStream.pipe(ps).pipe(gunzip).pipe(extract);
}
callback(null, ps);
}
function download(backupConfig, remotePath, dataLayout, progressCallback, callback) {
assert.strictEqual(typeof backupConfig, 'object');
assert.strictEqual(typeof remotePath, 'string');
assert(dataLayout instanceof DataLayout, 'dataLayout must be a DataLayout');
assert.strictEqual(typeof progressCallback, 'function');
assert.strictEqual(typeof callback, 'function');
debug(`download: Downloading ${remotePath} to ${dataLayout.toString()}`);
const backupFilePath = getBackupFilePath(backupConfig, remotePath);
async.retry({ times: 5, interval: 20000 }, function (retryCallback) {
progressCallback({ message: `Downloading backup ${remotePath}` });
storage.api(backupConfig.provider).download(backupConfig, backupFilePath, function (error, sourceStream) {
if (error) return retryCallback(error);
tarExtract(sourceStream, dataLayout, backupConfig.encryption, function (error, ps) {
if (error) return retryCallback(error);
ps.on('progress', function (progress) {
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
if (!transferred && !speed) return progressCallback({ message: 'Downloading backup' }); // 0M@0MBps looks wrong
progressCallback({ message: `Downloading ${transferred}M@${speed}MBps` });
});
ps.on('error', retryCallback);
ps.on('done', retryCallback);
});
});
}, callback);
}
function upload(backupConfig, remotePath, dataLayout, progressCallback, callback) {
assert.strictEqual(typeof backupConfig, 'object');
assert.strictEqual(typeof remotePath, 'string');
assert.strictEqual(typeof dataLayout, 'object');
assert.strictEqual(typeof progressCallback, 'function');
assert.strictEqual(typeof callback, 'function');
async.retry({ times: 5, interval: 20000 }, function (retryCallback) {
retryCallback = once(retryCallback); // protect again upload() erroring much later after tar stream error
tarPack(dataLayout, backupConfig.encryption, function (error, tarStream) {
if (error) return retryCallback(error);
tarStream.on('progress', function (progress) {
const transferred = Math.round(progress.transferred/1024/1024), speed = Math.round(progress.speed/1024/1024);
if (!transferred && !speed) return progressCallback({ message: 'Uploading backup' }); // 0M@0MBps looks wrong
progressCallback({ message: `Uploading backup ${transferred}M@${speed}MBps` });
});
tarStream.on('error', retryCallback); // already returns BoxError
storage.api(backupConfig.provider).upload(backupConfig, getBackupFilePath(backupConfig, remotePath), tarStream, retryCallback);
});
}, callback);
}