'use strict'; exports = module.exports = { getAvailableSize, upload, exists, download, copy, listDir, remove, removeDir, cleanup, testConfig, removePrivateFields, injectPrivateFields, // Used to mock AWS _chunk: chunk }; const assert = require('assert'), async = require('async'), BoxError = require('../boxerror.js'), { ConfiguredRetryStrategy } = require('@smithy/util-retry'), constants = require('../constants.js'), consumers = require('node:stream/consumers'), debug = require('debug')('box:storage/s3'), http = require('http'), https = require('https'), { NodeHttpHandler } = require('@smithy/node-http-handler'), { PassThrough } = require('node:stream'), path = require('path'), { Readable } = require('stream'), { S3, NoSuchKey, NoSuchBucket } = require('@aws-sdk/client-s3'), safe = require('safetydance'), { Upload } = require('@aws-sdk/lib-storage'); function S3_NOT_FOUND(error) { return error instanceof NoSuchKey || error instanceof NoSuchBucket; } const RETRY_STRATEGY = new ConfiguredRetryStrategy(10 /* max attempts */, (/* attempt */) => 20000 /* constant backoff */); function createS3Client(apiConfig, options) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof options, 'object'); const credentials = { accessKeyId: apiConfig.accessKeyId, secretAccessKey: apiConfig.secretAccessKey }; const requestHandler = new NodeHttpHandler({ connectionTimeout: 60000, socketTimeout: 20 * 60 * 1000 }); // sdk v3 only has signature support v4 const clientConfig = { forcePathStyle: apiConfig.s3ForcePathStyle === true ? true : false, // Use vhost style instead of path style - https://forums.aws.amazon.com/ann.jspa?annID=6776 region: apiConfig.region || 'us-east-1', credentials, requestHandler: requestHandler }; if (options.retryStrategy) clientConfig.retryStrategy = options.retryStrategy; if (apiConfig.endpoint) clientConfig.endpoint = apiConfig.endpoint; // s3 endpoint names come from the SDK const isHttps = clientConfig.endpoint?.startsWith('https://') || apiConfig.provider === 's3'; if (isHttps) { if (apiConfig.acceptSelfSignedCerts || apiConfig.bucket.includes('.')) { requestHandler.agent = new https.Agent({ rejectUnauthorized: false }); } } else { // http agent is required for http endpoints requestHandler.agent = new http.Agent({}); } return constants.TEST ? new globalThis.S3Mock(clientConfig) : new S3(clientConfig); } async function getAvailableSize(apiConfig) { assert.strictEqual(typeof apiConfig, 'object'); return Number.POSITIVE_INFINITY; } async function upload(apiConfig, backupFilePath) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof backupFilePath, 'string'); const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY }); // s3.upload automatically does a multi-part upload. we set queueSize to 3 to reduce memory usage // uploader will buffer at most queueSize * partSize bytes into memory at any given time. // scaleway only supports 1000 parts per object (https://www.scaleway.com/en/docs/s3-multipart-upload/) // s3: https://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html (max 10k parts and no size limit on the last part!) const partSize = apiConfig.limits?.uploadPartSize || (apiConfig.provider === 'scaleway-objectstorage' ? 100 * 1024 * 1024 : 10 * 1024 * 1024); const passThrough = new PassThrough(); const options = { client: s3, params: { Bucket: apiConfig.bucket, Key: backupFilePath, Body: passThrough }, partSize, queueSize: 3, leavePartsOnError: false }; const managedUpload = constants.TEST ? new globalThis.S3MockUpload(options) : new Upload(options); managedUpload.on('httpUploadProgress', (progress) => debug(`Upload progress: ${JSON.stringify(progress)}`)); const uploadPromise = managedUpload.done(); return { stream: passThrough, async finish() { const [error, data] = await safe(uploadPromise); if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Upload error: code: ${error.Code} message: ${error.message}`); // sometimes message is null debug(`Upload finished. ${JSON.stringify(data)}`); } }; } async function exists(apiConfig, backupFilePath) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof backupFilePath, 'string'); const s3 = createS3Client(apiConfig, { retryStrategy: null }); if (!backupFilePath.endsWith('/')) { // check for file const params = { Bucket: apiConfig.bucket, Key: backupFilePath }; const [error, response] = await safe(s3.headObject(params)); if (error && S3_NOT_FOUND(error)) return false; if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error headObject ${backupFilePath}. Message: ${error.message} HTTP Code: ${error.$metadata.httpStatusCode}`); if (!response || typeof response.Metadata !== 'object') throw new BoxError(BoxError.EXTERNAL_ERROR, 'not a s3 endpoint'); return true; } else { // list dir contents const listParams = { Bucket: apiConfig.bucket, Prefix: backupFilePath, MaxKeys: 1 }; const [error, listData] = await safe(s3.listObjectsV2(listParams)); if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error listing objects ${backupFilePath}. Message: ${error.message} HTTP Code: ${error.$metadata.httpStatusCode}`); return listData.Contents.length !== 0; } } // Download the object in small parts. By downloading small parts, we reduce the chance of sporadic network errors when downloading large objects // We can retry each part individually, but we haven't had the need for this yet class S3MultipartDownloadStream extends Readable { constructor (s3, params, options) { super(options); this._s3 = s3; this._params = params; this._readSize = 0; this._fileSize = -1; this._path = params.Bucket + '/' + params.Key; this._blockSize = options.blockSize || 64 * 1048576; // MB } _done() { this._readSize = 0; this.push(null); // EOF } _handleError(error) { if (S3_NOT_FOUND(error)) { this.destroy(new BoxError(BoxError.NOT_FOUND, `Backup not found: ${this._path}`)); } else { debug(`download: ${this._path} s3 stream error. %o`, error); this.destroy(new BoxError(BoxError.EXTERNAL_ERROR, `Error multipartDownload ${this._path}. Message: ${error.message} HTTP Code: ${error.$metadata.httpStatusCode}`)); } } async _downloadRange(offset, length) { const params = Object.assign({}, this._params); const lastPos = offset + length - 1; const range = `bytes=${offset}-${lastPos}`; params['Range'] = range; const [error, data] = await safe(this._s3.getObject(params)); if (error) return this._handleError(error); const contentLength = parseInt(data.ContentLength, 10); // should be same as length if (contentLength > 0) { this._readSize += contentLength; const body = await consumers.buffer(data.Body); // data.Body.transformToString('binary') also works this.push(body); } else { this._done(); } } _nextDownload() { let len = 0; if (this._readSize + this._blockSize < this._fileSize) { len = this._blockSize; } else { len = this._fileSize - this._readSize; } this._downloadRange(this._readSize, len); } async _fetchSize() { const [error, data] = await safe(this._s3.headObject(this._params)); if (error) return this._handleError(error); const length = parseInt(data.ContentLength, 10); if (length > 0) { this._fileSize = length; this._nextDownload(); } else { this._done(); } } _read() { // reimp if (this._readSize === this._fileSize) return this._done(); if (this._readSize === 0) return this._fetchSize(); this._nextDownload(); } } async function download(apiConfig, backupFilePath) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof backupFilePath, 'string'); const params = { Bucket: apiConfig.bucket, Key: backupFilePath }; const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY }); return new S3MultipartDownloadStream(s3, params, { blockSize: 64 * 1024 * 1024 }); } async function listDir(apiConfig, dir, batchSize, marker) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof dir, 'string'); assert.strictEqual(typeof batchSize, 'number'); assert(typeof marker !== 'undefined'); const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY }); const listParams = { Bucket: apiConfig.bucket, Prefix: dir, MaxKeys: batchSize }; if (marker) listParams.ContinuationToken = marker; const [error, listData] = await safe(s3.listObjectsV2(listParams)); if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error listing objects in ${dir}. Message: ${error.message} HTTP Code: ${error.$metadata.httpStatusCode}`); if (listData.Contents.length === 0) return { entries: [], marker: null }; // no more const entries = listData.Contents.map(function (c) { return { fullPath: c.Key, size: c.Size }; }); return { entries, marker: !listData.IsTruncated ? null : listData.NextContinuationToken }; } // https://github.com/aws/aws-sdk-js/blob/2b6bcbdec1f274fe931640c1b61ece999aae7a19/lib/util.js#L41 // https://github.com/GeorgePhillips/node-s3-url-encode/blob/master/index.js // See aws-sdk-js/issues/1302 function encodeCopySource(bucket, path) { // AWS percent-encodes some extra non-standard characters in a URI const output = encodeURI(path).replace(/[+!"#$@&'()*+,:;=?@]/g, function(ch) { return '%' + ch.charCodeAt(0).toString(16).toUpperCase(); }); // the slash at the beginning is optional return `/${bucket}/${output}`; } async function copyFile(apiConfig, oldFilePath, newFilePath, entry, progressCallback) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof oldFilePath, 'string'); assert.strictEqual(typeof newFilePath, 'string'); assert.strictEqual(typeof entry, 'object'); assert.strictEqual(typeof progressCallback, 'function'); const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY }); // https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html const relativePath = path.relative(oldFilePath, entry.fullPath); function throwError(error) { if (error) debug(`copy: s3 copy error when copying ${entry.fullPath}: ${error}`); if (error && S3_NOT_FOUND(error)) throw new BoxError(BoxError.NOT_FOUND, `Old backup not found: ${entry.fullPath}`); if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error copying ${entry.fullPath} (${entry.size} bytes): ${error.Code || ''} ${error}`); } const copyParams = { Bucket: apiConfig.bucket, Key: path.join(newFilePath, relativePath) }; // S3 copyObject has a file size limit of 5GB so if we have larger files, we do a multipart copy const largeFileLimit = (apiConfig.provider === 'vultr-objectstorage' || apiConfig.provider === 'exoscale-sos' || apiConfig.provider === 'backblaze-b2' || apiConfig.provider === 'digitalocean-spaces') ? 1024 * 1024 * 1024 : 3 * 1024 * 1024 * 1024; if (entry.size < largeFileLimit) { progressCallback({ message: `Copying ${relativePath || oldFilePath}` }); copyParams.CopySource = encodeCopySource(apiConfig.bucket, entry.fullPath); const [copyError] = await safe(s3.copyObject(copyParams)); if (copyError) return throwError(copyError); return; } progressCallback({ message: `Copying (multipart) ${relativePath || oldFilePath}` }); const [createMultipartError, multipart] = await safe(s3.createMultipartUpload(copyParams)); if (createMultipartError) return throwError(createMultipartError); // Exoscale (96M) was suggested by exoscale. 1GB for others is arbitrary size const chunkSize = apiConfig.provider === 'exoscale-sos' ? 96 * 1024 * 1024 : 1024 * 1024 * 1024; const uploadId = multipart.UploadId; const uploadedParts = [], ranges = []; let cur = 0; while (cur + chunkSize < entry.size) { ranges.push({ startBytes: cur, endBytes: cur + chunkSize - 1 }); cur += chunkSize; } ranges.push({ startBytes: cur, endBytes: entry.size-1 }); const [copyError] = await safe(async.eachOfLimit(ranges, 3, async function copyChunk(range, index) { const partCopyParams = { Bucket: apiConfig.bucket, Key: path.join(newFilePath, relativePath), CopySource: encodeCopySource(apiConfig.bucket, entry.fullPath), // See aws-sdk-js/issues/1302 CopySourceRange: 'bytes=' + range.startBytes + '-' + range.endBytes, PartNumber: index+1, UploadId: uploadId }; progressCallback({ message: `Copying part ${partCopyParams.PartNumber} - ${partCopyParams.CopySource} ${partCopyParams.CopySourceRange}` }); const part = await s3.uploadPartCopy(partCopyParams); progressCallback({ message: `Copied part ${partCopyParams.PartNumber} - Etag: ${part.CopyPartResult.ETag}` }); if (!part.CopyPartResult.ETag) throw new Error('Multi-part copy is broken or not implemented by the S3 storage provider'); uploadedParts[index] = { ETag: part.CopyPartResult.ETag, PartNumber: partCopyParams.PartNumber }; })); if (copyError) { const abortParams = { Bucket: apiConfig.bucket, Key: path.join(newFilePath, relativePath), UploadId: uploadId }; progressCallback({ message: `Aborting multipart copy of ${relativePath || oldFilePath}` }); await safe(s3.abortMultipartUpload(abortParams), { debug }); // ignore any abort errors return throwError(copyError); } const completeMultipartParams = { Bucket: apiConfig.bucket, Key: path.join(newFilePath, relativePath), MultipartUpload: { Parts: uploadedParts }, UploadId: uploadId }; progressCallback({ message: `Finishing multipart copy - ${completeMultipartParams.Key}` }); const [completeMultipartError] = await safe(s3.completeMultipartUpload(completeMultipartParams)); if (completeMultipartError) return throwError(completeMultipartError); } async function copy(apiConfig, oldFilePath, newFilePath, progressCallback) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof oldFilePath, 'string'); assert.strictEqual(typeof newFilePath, 'string'); assert.strictEqual(typeof progressCallback, 'function'); let total = 0; const concurrency = apiConfig.limits?.copyConcurrency || (apiConfig.provider === 's3' ? 500 : 10); progressCallback({ message: `Copying with concurrency of ${concurrency}` }); let marker = null; while (true) { const batch = await listDir(apiConfig, oldFilePath, 1000, marker); total += batch.entries.length; progressCallback({ message: `Copying files from ${total-batch.entries.length}-${total}` }); await async.eachLimit(batch.entries, concurrency, async (entry) => await copyFile(apiConfig, oldFilePath, newFilePath, entry, progressCallback)); if (!batch.marker) break; marker = batch.marker; } progressCallback({ message: `Copied ${total} files` }); } async function remove(apiConfig, filename) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof filename, 'string'); const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY }); const deleteParams = { Bucket: apiConfig.bucket, Delete: { Objects: [{ Key: filename }] } }; // deleteObjects does not return error if key is not found const [error] = await safe(s3.deleteObjects(deleteParams)); if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Unable to remove ${deleteParams.Key}. error: ${error.message}`); } function chunk(array, size) { assert(Array.isArray(array)); assert.strictEqual(typeof size, 'number'); const length = array.length; if (!length) return []; let index = 0, resIndex = 0; const result = Array(Math.ceil(length / size)); for (; index < length; index += size) { result[resIndex++] = array.slice(index, index+size); } return result; } async function removeDir(apiConfig, pathPrefix, progressCallback) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof pathPrefix, 'string'); assert.strictEqual(typeof progressCallback, 'function'); const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY }); let total = 0; let marker = null; while (true) { const batch = await listDir(apiConfig, pathPrefix, 1000, marker); const entries = batch.entries; total += entries.length; const chunkSize = apiConfig.limits?.deleteConcurrency || (apiConfig.provider !== 'digitalocean-spaces' ? 1000 : 100); // throttle objects in each request const chunks = chunk(entries, chunkSize); await async.eachSeries(chunks, async function deleteFiles(objects) { const deleteParams = { Bucket: apiConfig.bucket, Delete: { Objects: objects.map(function (o) { return { Key: o.fullPath }; }) } }; progressCallback({ message: `Removing ${objects.length} files from ${objects[0].fullPath} to ${objects[objects.length-1].fullPath}` }); // deleteObjects does not return error if key is not found const [error] = await safe(s3.deleteObjects(deleteParams)); if (error) { progressCallback({ message: `Unable to remove ${deleteParams.Key} ${error.message || error.Code}` }); throw new BoxError(BoxError.EXTERNAL_ERROR, `Unable to remove ${deleteParams.Key}. error: ${error.message}`); } }); if (!batch.marker) break; marker = batch.marker; } progressCallback({ message: `Removed ${total} files` }); } // often, the AbortIncompleteMultipartUpload lifecycle rule is not added to the bucket resulting in large bucket sizes over time async function cleanup(apiConfig, progressCallback) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof progressCallback, 'function'); const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY }); const uploads = await s3.listMultipartUploads({ Bucket: apiConfig.bucket, Prefix: apiConfig.prefix }); progressCallback({ message: `Cleaning up any aborted multi-part uploads. count:${uploads.Uploads?.length || 0} truncated:${uploads.IsTruncated}` }); if (!uploads.Uploads) return; for (const upload of uploads.Uploads) { if (Date.now() - new Date(upload.Initiated) < 3 * 24 * 60 * 60 * 1000) continue; // 3 days ago progressCallback({ message: `Cleaning up multi-part upload uploadId:${upload.UploadId} key:${upload.Key}` }); await safe(s3.abortMultipartUpload({ Bucket: apiConfig.bucket, Key: upload.Key, UploadId: upload.UploadId }), { debug }); // ignore error } } async function testConfig(apiConfig) { assert.strictEqual(typeof apiConfig, 'object'); if (typeof apiConfig.accessKeyId !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'accessKeyId must be a string'); if (typeof apiConfig.secretAccessKey !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'secretAccessKey must be a string'); if (typeof apiConfig.bucket !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'bucket must be a string'); // the node module seems to incorrectly accept bucket name with '/' if (apiConfig.bucket.includes('/')) throw new BoxError(BoxError.BAD_FIELD, 'bucket name cannot contain "/"'); // names must be lowercase and start with a letter or number. can contain dashes if (apiConfig.bucket.includes('_') || apiConfig.bucket.match(/[A-Z]/)) throw new BoxError(BoxError.BAD_FIELD, 'bucket name cannot contain "_" or capitals'); if (typeof apiConfig.prefix !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'prefix must be a string'); if ('signatureVersion' in apiConfig && typeof apiConfig.signatureVersion !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'signatureVersion must be a string'); if ('endpoint' in apiConfig && typeof apiConfig.endpoint !== 'string') throw new BoxError(BoxError.BAD_FIELD, 'endpoint must be a string'); if ('acceptSelfSignedCerts' in apiConfig && typeof apiConfig.acceptSelfSignedCerts !== 'boolean') throw new BoxError(BoxError.BAD_FIELD, 'acceptSelfSignedCerts must be a boolean'); if ('s3ForcePathStyle' in apiConfig && typeof apiConfig.s3ForcePathStyle !== 'boolean') throw new BoxError(BoxError.BAD_FIELD, 's3ForcePathStyle must be a boolean'); const putParams = { Bucket: apiConfig.bucket, Key: path.join(apiConfig.prefix, 'snapshot/cloudron-testfile'), Body: 'testcontent' }; const s3 = createS3Client(apiConfig, {}); const [putError] = await safe(s3.putObject(putParams)); if (putError) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error put object cloudron-testfile. Message: ${putError.message} HTTP Code: ${putError.$metadata.httpStatusCode}`); const listParams = { Bucket: apiConfig.bucket, Prefix: path.join(apiConfig.prefix, 'snapshot'), MaxKeys: 1 }; const [listError] = await safe(s3.listObjectsV2(listParams)); if (listError) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error listing objects. Message: ${listError.message} HTTP Code: ${listError.$metadata.httpStatusCode}`); const delParams = { Bucket: apiConfig.bucket, Key: path.join(apiConfig.prefix, 'snapshot/cloudron-testfile') }; const [delError] = await safe(s3.deleteObject(delParams)); if (delError) throw new BoxError(BoxError.EXTERNAL_ERROR, `Error del object cloudron-testfile. Message: ${delError.message} HTTP Code: ${delError.$metadata.httpStatusCode}`); } function removePrivateFields(apiConfig) { apiConfig.secretAccessKey = constants.SECRET_PLACEHOLDER; return apiConfig; } function injectPrivateFields(newConfig, currentConfig) { if (newConfig.secretAccessKey === constants.SECRET_PLACEHOLDER) newConfig.secretAccessKey = currentConfig.secretAccessKey; }