tgz: extract using tar-stream directly

we used have a fork of tar-fs. using tar-stream directly gives us
more control
This commit is contained in:
Girish Ramakrishnan
2024-07-05 17:53:35 +02:00
parent dd9e6e63ad
commit 1dc6b40a68
8 changed files with 305 additions and 244 deletions

View File

@@ -81,42 +81,25 @@ function hasChownSupportSync(apiConfig) {
}
}
function upload(apiConfig, backupFilePath, sourceStream, callback) {
async function upload(apiConfig, backupFilePath) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof sourceStream, 'object');
assert.strictEqual(typeof callback, 'function');
fs.mkdir(path.dirname(backupFilePath), { recursive: true }, function (error) {
if (error) return callback(new BoxError(BoxError.EXTERNAL_ERROR, error.message));
const [mkdirError] = await safe(fs.promises.mkdir(path.dirname(backupFilePath), { recursive: true }));
if (mkdirError) throw new BoxError(BoxError.FS_ERROR, `Error creating directory ${backupFilePath}: ${mkdirError.message}`);
safe.fs.unlinkSync(backupFilePath); // remove any hardlink
await safe(fs.promises.unlink(backupFilePath), { debug }); // remove any hardlink
var fileStream = fs.createWriteStream(backupFilePath);
// this pattern is required to ensure that the file got created before 'finish'
fileStream.on('open', function () {
sourceStream.pipe(fileStream);
});
fileStream.on('error', function (error) {
debug(`upload: [${backupFilePath}] out stream error. %o`, error);
callback(new BoxError(BoxError.EXTERNAL_ERROR, error.message));
});
fileStream.on('finish', function () {
return {
stream: fs.createWriteStream(backupFilePath, { autoClose: true }),
async finish() {
const backupUid = parseInt(process.env.SUDO_UID, 10) || process.getuid(); // in test, upload() may or may not be called via sudo script
if (hasChownSupportSync(apiConfig)) {
if (!safe.fs.chownSync(backupFilePath, backupUid, backupUid)) return callback(new BoxError(BoxError.EXTERNAL_ERROR, 'Unable to chown:' + safe.error.message));
if (!safe.fs.chownSync(path.dirname(backupFilePath), backupUid, backupUid)) return callback(new BoxError(BoxError.EXTERNAL_ERROR, 'Unable to chown:' + safe.error.message));
if (!safe.fs.chownSync(backupFilePath, backupUid, backupUid)) throw new BoxError(BoxError.EXTERNAL_ERROR, `Unable to chown ${backupFilePath}: ${safe.error.message}`);
if (!safe.fs.chownSync(path.dirname(backupFilePath), backupUid, backupUid)) throw new BoxError(BoxError.EXTERNAL_ERROR, `Unable to chown parentdir ${backupFilePath}: ${safe.error.message}`);
}
debug(`upload ${backupFilePath}: done`);
callback(null);
});
});
}
};
}
async function download(apiConfig, sourceFilePath) {

View File

@@ -73,29 +73,20 @@ async function getAvailableSize(apiConfig) {
return Number.POSITIVE_INFINITY;
}
function upload(apiConfig, backupFilePath, sourceStream, callback) {
async function upload(apiConfig, backupFilePath) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof sourceStream, 'object');
assert.strictEqual(typeof callback, 'function');
debug(`Uploading to ${backupFilePath}`);
function done(error) {
if (error) {
debug(`upload: [${backupFilePath}] gcp upload error. %o`, error);
return callback(new BoxError(BoxError.EXTERNAL_ERROR, `Error uploading ${backupFilePath}. Message: ${error.message} HTTP Code: ${error.code}`));
}
const uploadStream = getBucket(apiConfig)
.file(backupFilePath)
.createWriteStream({ resumable: false });
callback(null);
}
const uploadStream = getBucket(apiConfig).file(backupFilePath)
.createWriteStream({resumable: false})
.on('finish', done)
.on('error', done);
sourceStream.pipe(uploadStream);
return {
uploadStream,
async finish() {}
};
}
async function exists(apiConfig, backupFilePath) {

View File

@@ -56,22 +56,19 @@ async function getAvailableSize(apiConfig) {
return Number.POSITIVE_INFINITY;
}
function upload(apiConfig, backupFilePath, sourceStream, callback) {
async function upload(apiConfig, backupFilePath) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof sourceStream, 'object');
assert.strictEqual(typeof callback, 'function');
// Result: none
// sourceStream errors are handled upstream
callback(new BoxError(BoxError.NOT_IMPLEMENTED, 'upload is not implemented'));
// Result: { stream, finish() callback }
throw new BoxError(BoxError.NOT_IMPLEMENTED, 'upload is not implemented');
}
async function exists(apiConfig, backupFilePath) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
// Result: boolean if exists or not
throw new BoxError(BoxError.NOT_IMPLEMENTED, 'exists is not implemented');
}

View File

@@ -34,6 +34,7 @@ const assert = require('assert'),
constants = require('../constants.js'),
debug = require('debug')('box:storage/s3'),
https = require('https'),
{ PassThrough } = require('node:stream'),
path = require('path'),
Readable = require('stream').Readable,
safe = require('safetydance'),
@@ -103,20 +104,11 @@ async function getAvailableSize(apiConfig) {
return Number.POSITIVE_INFINITY;
}
function upload(apiConfig, backupFilePath, sourceStream, callback) {
async function upload(apiConfig, backupFilePath) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof sourceStream, 'object');
assert.strictEqual(typeof callback, 'function');
const credentials = getS3Config(apiConfig);
const params = {
Bucket: apiConfig.bucket,
Key: backupFilePath,
Body: sourceStream
};
const s3 = new aws.S3(credentials);
// s3.upload automatically does a multi-part upload. we set queueSize to 3 to reduce memory usage
@@ -125,16 +117,26 @@ function upload(apiConfig, backupFilePath, sourceStream, callback) {
// s3: https://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html (max 10k parts and no size limit on the last part!)
const partSize = apiConfig.limits?.uploadPartSize || (apiConfig.provider === 'scaleway-objectstorage' ? 100 * 1024 * 1024 : 10 * 1024 * 1024);
s3.upload(params, { partSize, queueSize: 3 }, function (error, data) {
if (error) {
debug(`upload: [${backupFilePath}] s3 upload error. %o`, error);
return callback(new BoxError(BoxError.EXTERNAL_ERROR, `Error uploading ${backupFilePath}. Message: ${error.message} HTTP Code: ${error.code}`));
const passThrough = new PassThrough();
const params = {
Bucket: apiConfig.bucket,
Key: backupFilePath,
Body: passThrough
};
const managedUpload = s3.upload(params, { partSize, queueSize: 3 });
managedUpload.on('httpUploadProgress', (progress) => debug(`Upload progress: ${JSON.stringify(progress)}`));
const uploadPromise = managedUpload.promise();
return {
stream: passThrough,
async finish() {
const [error, data] = await safe(uploadPromise);
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Upload error: ${error.message}`);
debug(`Upload finished. ${JSON.stringify(data)}`);
}
debug(`Uploaded ${backupFilePath} with partSize ${partSize}: ${JSON.stringify(data)}`);
callback(null);
});
};
}
async function exists(apiConfig, backupFilePath) {
@@ -487,7 +489,6 @@ async function removeDir(apiConfig, pathPrefix, progressCallback) {
assert.strictEqual(typeof pathPrefix, 'string');
assert.strictEqual(typeof progressCallback, 'function');
const credentials = getS3Config(apiConfig);
const s3 = new aws.S3(credentials);
const listDirAsync = util.promisify(listDir);