Files
cloudron-box/src/storage/s3.js
Girish Ramakrishnan 98b5c77177 s3: add listing check
This is needed for situations like in cloudflare where the endpoint can
be mistakenly configured with the bucket name like https://xx.r2.cloudflarestorage.com/cloudron-backups .
The upload and del calls work but list and copy does not.

(cherry picked from commit 093fc98ae5)
2023-01-17 11:18:05 +01:00

598 lines
23 KiB
JavaScript

'use strict';
exports = module.exports = {
getBackupRootPath,
getBackupProviderStatus,
getAvailableSize,
upload,
exists,
download,
copy,
listDir,
remove,
removeDir,
remount,
testConfig,
removePrivateFields,
injectPrivateFields,
// Used to mock AWS
_mockInject: mockInject,
_mockRestore: mockRestore,
_chunk: chunk
};
const assert = require('assert'),
async = require('async'),
AwsSdk = require('aws-sdk'),
BoxError = require('../boxerror.js'),
constants = require('../constants.js'),
debug = require('debug')('box:storage/s3'),
https = require('https'),
path = require('path'),
Readable = require('stream').Readable,
safe = require('safetydance'),
util = require('util'),
_ = require('underscore');
let aws = AwsSdk;
// test only
let originalAWS;
function mockInject(mock) {
originalAWS = aws;
aws = mock;
}
function mockRestore() {
aws = originalAWS;
}
function S3_NOT_FOUND(error) {
return error.code === 'NoSuchKey' || error.code === 'NotFound' || error.code === 'ENOENT';
}
function getS3Config(apiConfig) {
assert.strictEqual(typeof apiConfig, 'object');
const credentials = {
signatureVersion: apiConfig.signatureVersion || 'v4',
s3ForcePathStyle: false, // Use vhost style instead of path style - https://forums.aws.amazon.com/ann.jspa?annID=6776
accessKeyId: apiConfig.accessKeyId,
secretAccessKey: apiConfig.secretAccessKey,
region: apiConfig.region || 'us-east-1',
maxRetries: 10,
retryDelayOptions: {
customBackoff: (/* retryCount, error */) => 20000 // constant backoff - https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Config.html#retryDelayOptions-property
},
httpOptions: {
connectTimeout: 60000, // https://github.com/aws/aws-sdk-js/pull/1446
timeout: 0 // https://github.com/aws/aws-sdk-js/issues/1704 (allow unlimited time for chunk upload)
}
};
if (apiConfig.endpoint) credentials.endpoint = apiConfig.endpoint;
if (apiConfig.s3ForcePathStyle === true) credentials.s3ForcePathStyle = true;
// s3 endpoint names come from the SDK
const isHttps = (credentials.endpoint && credentials.endpoint.startsWith('https://')) || apiConfig.provider === 's3';
if (isHttps) { // only set agent for https calls. otherwise, it crashes
if (apiConfig.acceptSelfSignedCerts || apiConfig.bucket.includes('.')) {
credentials.httpOptions.agent = new https.Agent({ rejectUnauthorized: false });
}
}
return credentials;
}
// storage api
function getBackupRootPath(apiConfig) {
assert.strictEqual(typeof apiConfig, 'object');
return apiConfig.prefix;
}
async function getBackupProviderStatus(apiConfig) {
assert.strictEqual(typeof apiConfig, 'object');
return { state: 'active' };
}
async function getAvailableSize(apiConfig) {
assert.strictEqual(typeof apiConfig, 'object');
return Number.POSITIVE_INFINITY;
}
function upload(apiConfig, backupFilePath, sourceStream, callback) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof sourceStream, 'object');
assert.strictEqual(typeof callback, 'function');
const credentials = getS3Config(apiConfig);
const params = {
Bucket: apiConfig.bucket,
Key: backupFilePath,
Body: sourceStream
};
const s3 = new aws.S3(credentials);
// s3.upload automatically does a multi-part upload. we set queueSize to 3 to reduce memory usage
// uploader will buffer at most queueSize * partSize bytes into memory at any given time.
// scaleway only supports 1000 parts per object (https://www.scaleway.com/en/docs/s3-multipart-upload/)
// s3: https://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html (max 10k parts and no size limit on the last part!)
const partSize = apiConfig.uploadPartSize || (apiConfig.provider === 'scaleway-objectstorage' ? 100 * 1024 * 1024 : 10 * 1024 * 1024);
s3.upload(params, { partSize, queueSize: 3 }, function (error, data) {
if (error) {
debug('Error uploading [%s]: s3 upload error.', backupFilePath, error);
return callback(new BoxError(BoxError.EXTERNAL_ERROR, `Error uploading ${backupFilePath}. Message: ${error.message} HTTP Code: ${error.code}`));
}
debug(`Uploaded ${backupFilePath} with partSize ${partSize}: ${JSON.stringify(data)}`);
callback(null);
});
}
async function exists(apiConfig, backupFilePath) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
const credentials = getS3Config(apiConfig);
const s3 = new aws.S3(_.omit(credentials, 'retryDelayOptions', 'maxRetries'));
if (!backupFilePath.endsWith('/')) { // check for file
const params = {
Bucket: apiConfig.bucket,
Key: backupFilePath
};
const [error] = await safe(s3.headObject(params).promise());
if (!Object.keys(this.httpResponse.headers).some(h => h.startsWith('x-amz'))) throw new BoxError(BoxError.EXTERNAL_ERROR, 'not a s3 endpoint');
if (error && S3_NOT_FOUND(error)) return false;
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error headObject ${backupFilePath}. Message: ${error.message} HTTP Code: ${error.code}`);
return true;
} else { // list dir contents
const listParams = {
Bucket: apiConfig.bucket,
Prefix: backupFilePath,
MaxKeys: 1
};
const [error, listData] = await safe(s3.listObjects(listParams).promise());
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error listing objects ${backupFilePath}. Message: ${error.message} HTTP Code: ${error.code}`);
return listData.Contents.length !== 0;
}
}
// Download the object in small parts. By downloading small parts, we reduce the chance of sporadic network errors when downloading large objects
// We can retry each part individually, but we haven't had the need for this yet
class S3MultipartDownloadStream extends Readable {
constructor (s3, params, options) {
super(options);
this._s3 = s3;
this._params = params;
this._readSize = 0;
this._fileSize = -1;
this._path = params.Bucket + '/' + params.Key;
this._blockSize = options.blockSize || 64 * 1048576; // MB
}
_done() {
this._readSize = 0;
this.push(null); // EOF
}
_handleError(error) {
if (S3_NOT_FOUND(error)) {
this.destroy(new BoxError(BoxError.NOT_FOUND, `Backup not found: ${this._path}`));
} else {
debug(`download: ${this._path} s3 stream error.`, error);
this.destroy(new BoxError(BoxError.EXTERNAL_ERROR, `Error multipartDownload ${this._path}. Message: ${error.message} HTTP Code: ${error.code}`));
}
}
_downloadRange(offset, length) {
const params = Object.assign({}, this._params);
const lastPos = offset + length - 1;
const range = `bytes=${offset}-${lastPos}`;
params['Range'] = range;
this._s3.getObject(params, (error, data) => {
if (error) return this._handleError(error);
const length = parseInt(data.ContentLength, 10);
if (length > 0) {
this._readSize += length;
this.push(data.Body);
} else {
this._done();
}
});
}
_nextDownload() {
let len = 0;
if (this._readSize + this._blockSize < this._fileSize) {
len = this._blockSize;
} else {
len = this._fileSize - this._readSize;
}
this._downloadRange(this._readSize, len);
}
_fetchSize() {
this._s3.headObject(this._params, (error, data) => {
if (error) return this._handleError(error);
const length = parseInt(data.ContentLength, 10);
if (length > 0) {
this._fileSize = length;
this._nextDownload();
} else {
this._done();
}
});
}
_read() {
if (this._readSize === this._fileSize) return this._done();
if (this._readSize === 0) return this._fetchSize();
this._nextDownload();
}
}
function download(apiConfig, backupFilePath, callback) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof callback, 'function');
const credentials = getS3Config(apiConfig);
const params = {
Bucket: apiConfig.bucket,
Key: backupFilePath
};
const s3 = new aws.S3(credentials);
const multipartDownloadStream = new S3MultipartDownloadStream(s3, params, { blockSize: 64 * 1024 * 1024 });
return callback(null, multipartDownloadStream);
}
function listDir(apiConfig, dir, batchSize, iteratorCallback, callback) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof dir, 'string');
assert.strictEqual(typeof batchSize, 'number');
assert.strictEqual(typeof iteratorCallback, 'function');
assert.strictEqual(typeof callback, 'function');
const credentials = getS3Config(apiConfig);
const s3 = new aws.S3(credentials);
const listParams = {
Bucket: apiConfig.bucket,
Prefix: dir,
MaxKeys: batchSize
};
let done = false;
async.whilst((testDone) => testDone(null, !done), function listAndDownload(whilstCallback) {
s3.listObjects(listParams, function (error, listData) {
if (error) return whilstCallback(new BoxError(BoxError.EXTERNAL_ERROR, `Error listing objects in ${dir}. Message: ${error.message} HTTP Code: ${error.code}`));
if (listData.Contents.length === 0) { done = true; return whilstCallback(); }
const entries = listData.Contents.map(function (c) { return { fullPath: c.Key, size: c.Size }; });
iteratorCallback(entries, function (error) {
if (error) return whilstCallback(error);
if (!listData.IsTruncated) { done = true; return whilstCallback(); }
listParams.Marker = listData.Contents[listData.Contents.length - 1].Key; // NextMarker is returned only with delimiter
whilstCallback();
});
});
}, callback);
}
// https://github.com/aws/aws-sdk-js/blob/2b6bcbdec1f274fe931640c1b61ece999aae7a19/lib/util.js#L41
// https://github.com/GeorgePhillips/node-s3-url-encode/blob/master/index.js
// See aws-sdk-js/issues/1302
function encodeCopySource(bucket, path) {
// AWS percent-encodes some extra non-standard characters in a URI
const output = encodeURI(path).replace(/[+!"#$@&'()*+,:;=?@]/g, function(ch) {
return '%' + ch.charCodeAt(0).toString(16).toUpperCase();
});
// the slash at the beginning is optional
return `/${bucket}/${output}`;
}
async function copy(apiConfig, oldFilePath, newFilePath, progressCallback) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof oldFilePath, 'string');
assert.strictEqual(typeof newFilePath, 'string');
assert.strictEqual(typeof progressCallback, 'function');
function copyFile(entry, iteratorCallback) {
const credentials = getS3Config(apiConfig);
const s3 = new aws.S3(credentials);
const relativePath = path.relative(oldFilePath, entry.fullPath);
function done(error) {
if (error) debug(`copy: s3 copy error when copying ${entry.fullPath}: ${error}`);
if (error && S3_NOT_FOUND(error)) return iteratorCallback(new BoxError(BoxError.NOT_FOUND, `Old backup not found: ${entry.fullPath}`));
if (error) return iteratorCallback(new BoxError(BoxError.EXTERNAL_ERROR, `Error copying ${entry.fullPath} (${entry.size} bytes): ${error.code || ''} ${error}`));
iteratorCallback(null);
}
const copyParams = {
Bucket: apiConfig.bucket,
Key: path.join(newFilePath, relativePath)
};
// S3 copyObject has a file size limit of 5GB so if we have larger files, we do a multipart copy
// Exoscale and B2 take too long to copy 5GB
const largeFileLimit = (apiConfig.provider === 'exoscale-sos' || apiConfig.provider === 'backblaze-b2' || apiConfig.provider === 'digitalocean-spaces') ? 1024 * 1024 * 1024 : 5 * 1024 * 1024 * 1024;
if (entry.size < largeFileLimit) {
progressCallback({ message: `Copying ${relativePath || oldFilePath}` });
copyParams.CopySource = encodeCopySource(apiConfig.bucket, entry.fullPath);
s3.copyObject(copyParams, done).on('retry', function (response) {
progressCallback({ message: `Retrying (${response.retryCount+1}) copy of ${relativePath || oldFilePath}. Error: ${response.error} ${response.httpResponse.statusCode}` });
// on DO, we get a random 408. these are not retried by the SDK
if (response.error) response.error.retryable = true; // https://github.com/aws/aws-sdk-js/issues/412
});
return;
}
progressCallback({ message: `Copying (multipart) ${relativePath || oldFilePath}` });
s3.createMultipartUpload(copyParams, function (error, multipart) {
if (error) return done(error);
// Exoscale (96M) was suggested by exoscale. 1GB - rather random size for others
const chunkSize = apiConfig.provider === 'exoscale-sos' ? 96 * 1024 * 1024 : 1024 * 1024 * 1024;
const uploadId = multipart.UploadId;
let uploadedParts = [], ranges = [];
let cur = 0;
while (cur + chunkSize < entry.size) {
ranges.push({ startBytes: cur, endBytes: cur + chunkSize - 1 });
cur += chunkSize;
}
ranges.push({ startBytes: cur, endBytes: entry.size-1 });
async.eachOfLimit(ranges, 3, function copyChunk(range, index, iteratorDone) {
const partCopyParams = {
Bucket: apiConfig.bucket,
Key: path.join(newFilePath, relativePath),
CopySource: encodeCopySource(apiConfig.bucket, entry.fullPath), // See aws-sdk-js/issues/1302
CopySourceRange: 'bytes=' + range.startBytes + '-' + range.endBytes,
PartNumber: index+1,
UploadId: uploadId
};
progressCallback({ message: `Copying part ${partCopyParams.PartNumber} - ${partCopyParams.CopySource} ${partCopyParams.CopySourceRange}` });
s3.uploadPartCopy(partCopyParams, function (error, part) {
if (error) return iteratorDone(error);
progressCallback({ message: `Copying part ${partCopyParams.PartNumber} - Etag: ${part.CopyPartResult.ETag}` });
if (!part.CopyPartResult.ETag) return iteratorDone(new Error('Multi-part copy is broken or not implemented by the S3 storage provider'));
uploadedParts[index] = { ETag: part.CopyPartResult.ETag, PartNumber: partCopyParams.PartNumber };
iteratorDone();
}).on('retry', function (response) {
progressCallback({ message: `Retrying (${response.retryCount+1}) multipart copy of ${relativePath || oldFilePath}. Error: ${response.error} ${response.httpResponse.statusCode}` });
});
}, function chunksCopied(error) {
if (error) { // we must still recommend the user to set a AbortIncompleteMultipartUpload lifecycle rule
const abortParams = {
Bucket: apiConfig.bucket,
Key: path.join(newFilePath, relativePath),
UploadId: uploadId
};
progressCallback({ message: `Aborting multipart copy of ${relativePath || oldFilePath}` });
return s3.abortMultipartUpload(abortParams, () => done(error)); // ignore any abort errors
}
const completeMultipartParams = {
Bucket: apiConfig.bucket,
Key: path.join(newFilePath, relativePath),
MultipartUpload: { Parts: uploadedParts },
UploadId: uploadId
};
progressCallback({ message: `Finishing multipart copy - ${completeMultipartParams.Key}` });
s3.completeMultipartUpload(completeMultipartParams, done);
});
});
}
let total = 0;
const concurrency = apiConfig.copyConcurrency || (apiConfig.provider === 's3' ? 500 : 10);
progressCallback({ message: `Copying with concurrency of ${concurrency}` });
const listDirAsync = util.promisify(listDir);
const [copyError] = await safe(listDirAsync(apiConfig, oldFilePath, 1000, function listDirIterator(entries, done) {
total += entries.length;
progressCallback({ message: `Copying files from ${total-entries.length}-${total}` });
async.eachLimit(entries, concurrency, copyFile, done);
}));
progressCallback({ message: `Copied ${total} files with error: ${copyError}` });
if (copyError) throw copyError;
}
async function remove(apiConfig, filename) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof filename, 'string');
const credentials = getS3Config(apiConfig);
const s3 = new aws.S3(credentials);
const deleteParams = {
Bucket: apiConfig.bucket,
Delete: {
Objects: [{ Key: filename }]
}
};
// deleteObjects does not return error if key is not found
const [error] = await safe(s3.deleteObjects(deleteParams).promise());
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Unable to remove ${deleteParams.Key}. error: ${error.message}`);
}
function chunk(array, size) {
assert(Array.isArray(array));
assert.strictEqual(typeof size, 'number');
const length = array.length;
if (!length) return [];
let index = 0, resIndex = 0, result = Array(Math.ceil(length / size));
for (; index < length; index += size) {
result[resIndex++] = array.slice(index, index+size);
}
return result;
}
async function removeDir(apiConfig, pathPrefix, progressCallback) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof pathPrefix, 'string');
assert.strictEqual(typeof progressCallback, 'function');
const credentials = getS3Config(apiConfig);
const s3 = new aws.S3(credentials);
const listDirAsync = util.promisify(listDir);
let total = 0;
await listDirAsync(apiConfig, pathPrefix, 1000, function listDirIterator(entries, done) {
total += entries.length;
const chunkSize = apiConfig.deleteConcurrency || (apiConfig.provider !== 'digitalocean-spaces' ? 1000 : 100); // throttle objects in each request
const chunks = chunk(entries, chunkSize);
async.eachSeries(chunks, async function deleteFiles(objects) {
const deleteParams = {
Bucket: apiConfig.bucket,
Delete: {
Objects: objects.map(function (o) { return { Key: o.fullPath }; })
}
};
progressCallback({ message: `Removing ${objects.length} files from ${objects[0].fullPath} to ${objects[objects.length-1].fullPath}` });
// deleteObjects does not return error if key is not found
const [error] = await safe(s3.deleteObjects(deleteParams).promise());
if (error) {
progressCallback({ message: `Unable to remove ${deleteParams.Key} ${error.message || error.code}` });
throw new BoxError(BoxError.EXTERNAL_ERROR, `Unable to remove ${deleteParams.Key}. error: ${error.message}`);
}
}, done);
});
progressCallback({ message: `Removed ${total} files` });
}
async function remount(apiConfig) {
assert.strictEqual(typeof apiConfig, 'object');
}
async function testConfig(apiConfig) {
assert.strictEqual(typeof apiConfig, 'object');
if (typeof apiConfig.accessKeyId !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'accessKeyId must be a string');
if (typeof apiConfig.secretAccessKey !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'secretAccessKey must be a string');
if (typeof apiConfig.bucket !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'bucket must be a string');
// the node module seems to incorrectly accept bucket name with '/'
if (apiConfig.bucket.includes('/')) throw new BoxError(BoxError.BAD_FIELD, 'bucket name cannot contain "/"');
// names must be lowercase and start with a letter or number. can contain dashes
if (apiConfig.bucket.includes('_') || apiConfig.bucket.match(/[A-Z]/)) throw new BoxError(BoxError.BAD_FIELD, 'bucket name cannot contain "_" or capitals');
if (typeof apiConfig.prefix !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'prefix must be a string');
if ('signatureVersion' in apiConfig && typeof apiConfig.signatureVersion !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'signatureVersion must be a string');
if ('endpoint' in apiConfig && typeof apiConfig.endpoint !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'endpoint must be a string');
if ('acceptSelfSignedCerts' in apiConfig && typeof apiConfig.acceptSelfSignedCerts !== 'boolean') throw new BoxError(BoxError.BAD_FIELD, 'acceptSelfSignedCerts must be a boolean');
if ('s3ForcePathStyle' in apiConfig && typeof apiConfig.s3ForcePathStyle !== 'boolean') throw new BoxError(BoxError.BAD_FIELD, 's3ForcePathStyle must be a boolean');
// attempt to upload and delete a file with new credentials
const credentials = getS3Config(apiConfig);
const putParams = {
Bucket: apiConfig.bucket,
Key: path.join(apiConfig.prefix, 'snapshot/cloudron-testfile'),
Body: 'testcontent'
};
const s3 = new aws.S3(_.omit(credentials, 'retryDelayOptions', 'maxRetries'));
const [putError] = await safe(s3.putObject(putParams).promise());
if (putError) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error put object cloudron-testfile. Message: ${putError.message} HTTP Code: ${putError.code}`);
const listParams = {
Bucket: apiConfig.bucket,
Prefix: path.join(apiConfig.prefix, 'snapshot'),
MaxKeys: 1
};
const [listError] = await safe(s3.listObjects(listParams).promise());
if (listError) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error listing objects. Message: ${listError.message} HTTP Code: ${listError.code}`);
const delParams = {
Bucket: apiConfig.bucket,
Key: path.join(apiConfig.prefix, 'snapshot/cloudron-testfile')
};
const [delError] = await safe(s3.deleteObject(delParams).promise());
if (delError) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error del object cloudron-testfile. Message: ${delError.message} HTTP Code: ${delError.code}`);
}
function removePrivateFields(apiConfig) {
apiConfig.secretAccessKey = constants.SECRET_PLACEHOLDER;
return apiConfig;
}
function injectPrivateFields(newConfig, currentConfig) {
if (newConfig.secretAccessKey === constants.SECRET_PLACEHOLDER) newConfig.secretAccessKey = currentConfig.secretAccessKey;
}