tgz: extract using tar-stream directly

we used have a fork of tar-fs. using tar-stream directly gives us
more control
This commit is contained in:
Girish Ramakrishnan
2024-07-05 17:53:35 +02:00
parent dd9e6e63ad
commit 1dc6b40a68
8 changed files with 305 additions and 244 deletions

View File

@@ -34,6 +34,7 @@ const assert = require('assert'),
constants = require('../constants.js'),
debug = require('debug')('box:storage/s3'),
https = require('https'),
{ PassThrough } = require('node:stream'),
path = require('path'),
Readable = require('stream').Readable,
safe = require('safetydance'),
@@ -103,20 +104,11 @@ async function getAvailableSize(apiConfig) {
return Number.POSITIVE_INFINITY;
}
function upload(apiConfig, backupFilePath, sourceStream, callback) {
async function upload(apiConfig, backupFilePath) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof sourceStream, 'object');
assert.strictEqual(typeof callback, 'function');
const credentials = getS3Config(apiConfig);
const params = {
Bucket: apiConfig.bucket,
Key: backupFilePath,
Body: sourceStream
};
const s3 = new aws.S3(credentials);
// s3.upload automatically does a multi-part upload. we set queueSize to 3 to reduce memory usage
@@ -125,16 +117,26 @@ function upload(apiConfig, backupFilePath, sourceStream, callback) {
// s3: https://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html (max 10k parts and no size limit on the last part!)
const partSize = apiConfig.limits?.uploadPartSize || (apiConfig.provider === 'scaleway-objectstorage' ? 100 * 1024 * 1024 : 10 * 1024 * 1024);
s3.upload(params, { partSize, queueSize: 3 }, function (error, data) {
if (error) {
debug(`upload: [${backupFilePath}] s3 upload error. %o`, error);
return callback(new BoxError(BoxError.EXTERNAL_ERROR, `Error uploading ${backupFilePath}. Message: ${error.message} HTTP Code: ${error.code}`));
const passThrough = new PassThrough();
const params = {
Bucket: apiConfig.bucket,
Key: backupFilePath,
Body: passThrough
};
const managedUpload = s3.upload(params, { partSize, queueSize: 3 });
managedUpload.on('httpUploadProgress', (progress) => debug(`Upload progress: ${JSON.stringify(progress)}`));
const uploadPromise = managedUpload.promise();
return {
stream: passThrough,
async finish() {
const [error, data] = await safe(uploadPromise);
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Upload error: ${error.message}`);
debug(`Upload finished. ${JSON.stringify(data)}`);
}
debug(`Uploaded ${backupFilePath} with partSize ${partSize}: ${JSON.stringify(data)}`);
callback(null);
});
};
}
async function exists(apiConfig, backupFilePath) {
@@ -487,7 +489,6 @@ async function removeDir(apiConfig, pathPrefix, progressCallback) {
assert.strictEqual(typeof pathPrefix, 'string');
assert.strictEqual(typeof progressCallback, 'function');
const credentials = getS3Config(apiConfig);
const s3 = new aws.S3(credentials);
const listDirAsync = util.promisify(listDir);