2015-08-24 11:13:21 -07:00
'use strict' ;
2025-10-08 20:11:55 +02:00
exports = module . exports = {
setup ,
teardown ,
cleanup ,
verifyConfig ,
removePrivateFields ,
injectPrivateFields ,
getAvailableSize ,
getStatus ,
upload ,
exists ,
download ,
copy ,
copyDir ,
listDir ,
remove ,
removeDir ,
// Used to mock AWS
_chunk : chunk
} ;
2025-08-14 11:17:38 +05:30
const assert = require ( 'node:assert' ) ,
2017-09-23 11:09:36 -07:00
async = require ( 'async' ) ,
2019-10-22 20:36:20 -07:00
BoxError = require ( '../boxerror.js' ) ,
2025-02-12 20:56:46 +01:00
{ ConfiguredRetryStrategy } = require ( '@smithy/util-retry' ) ,
2020-05-14 23:01:44 +02:00
constants = require ( '../constants.js' ) ,
2025-02-13 19:23:04 +01:00
consumers = require ( 'node:stream/consumers' ) ,
2025-08-14 11:17:38 +05:30
crypto = require ( 'node:crypto' ) ,
2017-04-18 15:33:06 +02:00
debug = require ( 'debug' ) ( 'box:storage/s3' ) ,
2025-08-14 11:17:38 +05:30
http = require ( 'node:http' ) ,
https = require ( 'node:https' ) ,
2025-02-12 20:56:46 +01:00
{ NodeHttpHandler } = require ( '@smithy/node-http-handler' ) ,
2024-07-05 17:53:35 +02:00
{ PassThrough } = require ( 'node:stream' ) ,
2025-08-14 11:17:38 +05:30
path = require ( 'node:path' ) ,
{ Readable } = require ( 'node:stream' ) ,
2025-06-20 16:04:59 +02:00
{ S3 , NoSuchKey , NoSuchBucket } = require ( '@aws-sdk/client-s3' ) ,
2022-04-14 07:59:50 -05:00
safe = require ( 'safetydance' ) ,
2025-08-01 18:55:04 +02:00
{ Upload } = require ( '@aws-sdk/lib-storage' ) ,
_ = require ( '../underscore.js' ) ;
2017-04-18 19:15:56 +02:00
2018-07-30 07:39:34 -07:00
function S3 _NOT _FOUND ( error ) {
2025-06-20 16:04:59 +02:00
return error instanceof NoSuchKey || error instanceof NoSuchBucket ;
2018-07-30 07:39:34 -07:00
}
2025-07-14 15:45:33 +02:00
function formatError ( error ) {
2025-07-16 09:12:21 +02:00
// $metadata can be undefined if HTTP request was never sent
return ` code: ${ error . Code } message: ${ error . message } HTTP: ${ error . $metadata ? . httpStatusCode } ` ;
2025-07-14 15:45:33 +02:00
}
2025-02-12 20:56:46 +01:00
const RETRY _STRATEGY = new ConfiguredRetryStrategy ( 10 /* max attempts */ , ( /* attempt */ ) => 20000 /* constant backoff */ ) ;
2025-07-15 00:31:07 +02:00
// AWS decided to use CRC32 for checksums. The Client SDK has then been changed to set this for requests as default.
// requestChecksumCalculation: "WHEN_REQUIRED", responseChecksumValidation: "WHEN_REQUIRED", "checksumAlgorithm": "md5" all don't work
// see also: https://github.com/aws/aws-sdk-js-v3/issues/6810 https://github.com/aws/aws-sdk-js-v3/issues/6819 https://github.com/aws/aws-sdk-js-v3/issues/6761
// this implements https://github.com/aws/aws-sdk-js-v3/blob/main/supplemental-docs/MD5_FALLBACK.md
const md5Middleware = ( next , context ) => async ( args ) => {
const isDeleteObjects = context . commandName === 'DeleteObjectsCommand' ;
if ( ! isDeleteObjects ) return next ( args ) ;
const headers = args . request . headers ;
// Remove any checksum headers added by default middleware. This ensures our Content-MD5 is the primary integrity check
for ( const header of Object . keys ( headers ) ) {
const lowerHeader = header . toLowerCase ( ) ;
if ( lowerHeader . startsWith ( 'x-amz-checksum-' ) || lowerHeader . startsWith ( 'x-amz-sdk-checksum-' ) ) {
delete headers [ header ] ;
}
}
if ( args . request . body ) {
const bodyContent = Buffer . from ( args . request . body ) ;
headers [ 'Content-MD5' ] = crypto . createHash ( 'md5' ) . update ( bodyContent ) . digest ( 'base64' ) ;
}
// DO spaces won't respond to 100-continue. not sure why. 76f365f7e8233efb335ba386a9f558b09238b08a has another way to delete this header
delete headers . Expect ;
return await next ( args ) ;
} ;
2025-02-12 20:56:46 +01:00
function createS3Client ( apiConfig , options ) {
2016-03-31 09:48:01 -07:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-02-12 20:56:46 +01:00
assert . strictEqual ( typeof options , 'object' ) ;
2015-08-24 11:13:21 -07:00
2022-04-14 07:35:41 -05:00
const credentials = {
2016-03-31 09:48:01 -07:00
accessKeyId : apiConfig . accessKeyId ,
2025-02-12 20:56:46 +01:00
secretAccessKey : apiConfig . secretAccessKey
2015-11-06 18:22:29 -08:00
} ;
2015-09-09 11:43:50 -07:00
2025-07-15 00:31:07 +02:00
const requestHandler = new NodeHttpHandler ( {
connectionTimeout : 60000 ,
socketTimeout : 20 * 60 * 1000
} ) ;
2015-09-09 11:43:50 -07:00
2025-02-12 20:56:46 +01:00
// sdk v3 only has signature support v4
const clientConfig = {
forcePathStyle : apiConfig . s3ForcePathStyle === true ? true : false , // Use vhost style instead of path style - https://forums.aws.amazon.com/ann.jspa?annID=6776
region : apiConfig . region || 'us-east-1' ,
credentials ,
2025-07-15 00:31:07 +02:00
requestHandler ,
2025-07-14 21:59:46 +02:00
// logger: console
2025-02-12 20:56:46 +01:00
} ;
if ( options . retryStrategy ) clientConfig . retryStrategy = options . retryStrategy ;
if ( apiConfig . endpoint ) clientConfig . endpoint = apiConfig . endpoint ;
2020-05-27 17:33:59 -07:00
2025-07-15 00:31:07 +02:00
// s3 endpoint names come from the SDK
2025-08-02 01:46:29 +02:00
const isHttps = clientConfig . endpoint ? . startsWith ( 'https://' ) || apiConfig . _provider === 's3' ;
2025-07-15 00:31:07 +02:00
if ( isHttps ) {
if ( apiConfig . acceptSelfSignedCerts || apiConfig . bucket . includes ( '.' ) ) {
requestHandler . agent = new https . Agent ( { rejectUnauthorized : false } ) ;
}
} else { // http agent is required for http endpoints
requestHandler . agent = new http . Agent ( { } ) ;
}
2025-07-14 21:59:46 +02:00
const client = constants . TEST ? new globalThis . S3Mock ( clientConfig ) : new S3 ( clientConfig ) ;
// https://github.com/aws/aws-sdk-js-v3/issues/6761#issuecomment-2574480834
// client.middlewareStack.add((next, context) => async (args) => {
// debug('AWS SDK context', context.clientName, context.commandName);
// debug('AWS SDK request input', JSON.stringify(args.input));
// const result = await next(args);
// console.log('AWS SDK request output:', result.output);
// return result;
// },
// {
// name: 'MyMiddleware',
// step: 'build',
// override: true,
// });
2025-07-15 00:31:07 +02:00
// This ensures it runs after default checksums might be added, but before signing
2025-08-02 01:46:29 +02:00
if ( options . deleteObjects && apiConfig . _provider !== 's3' ) {
2025-07-15 00:31:07 +02:00
// flexibleChecksumsMiddleware is only present when the request has a body. Only use this for DeleteObjects call. Other requests without a body will crash
client . middlewareStack . addRelativeTo ( md5Middleware , {
relation : 'after' ,
toMiddleware : 'flexibleChecksumsMiddleware' ,
name : 'addMD5ChecksumForDeleteObjects' ,
tags : [ 'MD5_FALLBACK' ] ,
} ) ;
}
2025-07-14 21:59:46 +02:00
return client ;
2015-08-24 11:13:21 -07:00
}
2015-08-25 10:01:04 -07:00
2022-10-02 17:22:44 +02:00
async function getAvailableSize ( apiConfig ) {
assert . strictEqual ( typeof apiConfig , 'object' ) ;
return Number . POSITIVE _INFINITY ;
2020-06-08 16:25:00 +02:00
}
2025-08-04 10:47:00 +02:00
async function getStatus ( apiConfig ) {
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-10-16 14:03:08 +02:00
const s3 = createS3Client ( apiConfig , { retryStrategy : null } ) ;
2025-08-04 10:47:00 +02:00
const listParams = {
Bucket : apiConfig . bucket ,
Prefix : path . join ( apiConfig . prefix , 'snapshot' ) ,
MaxKeys : 1
} ;
const [ listError ] = await safe ( s3 . listObjectsV2 ( listParams ) ) ;
if ( listError ) return { status : 'inactive' , message : ` Error listing objects. ${ formatError ( listError ) } ` } ;
2025-11-05 16:38:11 +01:00
return { state : 'active' , message : '' } ;
2025-08-04 10:47:00 +02:00
}
2025-08-02 01:46:29 +02:00
async function upload ( apiConfig , remotePath ) {
2016-09-16 11:21:08 +02:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-08-02 01:46:29 +02:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
2016-09-16 11:21:08 +02:00
2025-02-12 20:56:46 +01:00
const s3 = createS3Client ( apiConfig , { retryStrategy : RETRY _STRATEGY } ) ;
2017-10-10 10:47:41 -07:00
2022-04-14 07:35:41 -05:00
// s3.upload automatically does a multi-part upload. we set queueSize to 3 to reduce memory usage
// uploader will buffer at most queueSize * partSize bytes into memory at any given time.
// scaleway only supports 1000 parts per object (https://www.scaleway.com/en/docs/s3-multipart-upload/)
// s3: https://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html (max 10k parts and no size limit on the last part!)
2025-08-02 01:46:29 +02:00
const partSize = apiConfig . limits ? . uploadPartSize || ( apiConfig . _provider === 'scaleway-objectstorage' ? 100 * 1024 * 1024 : 10 * 1024 * 1024 ) ;
2019-09-30 20:42:37 -07:00
2024-07-05 17:53:35 +02:00
const passThrough = new PassThrough ( ) ;
2018-02-22 11:06:28 -08:00
2025-02-12 20:56:46 +01:00
const options = {
client : s3 ,
params : {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Key : path . join ( apiConfig . prefix , remotePath ) ,
2025-02-12 20:56:46 +01:00
Body : passThrough
} ,
partSize ,
queueSize : 3 ,
leavePartsOnError : false
2024-07-05 17:53:35 +02:00
} ;
2018-03-20 18:19:14 -07:00
2025-02-12 20:56:46 +01:00
const managedUpload = constants . TEST ? new globalThis . S3MockUpload ( options ) : new Upload ( options ) ;
2024-07-05 17:53:35 +02:00
managedUpload . on ( 'httpUploadProgress' , ( progress ) => debug ( ` Upload progress: ${ JSON . stringify ( progress ) } ` ) ) ;
2025-02-12 20:56:46 +01:00
const uploadPromise = managedUpload . done ( ) ;
2024-07-05 17:53:35 +02:00
return {
stream : passThrough ,
async finish ( ) {
const [ error , data ] = await safe ( uploadPromise ) ;
2025-06-20 16:04:59 +02:00
if ( error ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Upload error: code: ${ error . Code } message: ${ error . message } ` ) ; // sometimes message is null
2024-07-05 17:53:35 +02:00
debug ( ` Upload finished. ${ JSON . stringify ( data ) } ` ) ;
}
} ;
2015-08-25 10:01:04 -07:00
}
2015-08-26 16:14:51 -07:00
2025-08-02 01:46:29 +02:00
async function exists ( apiConfig , remotePath ) {
2021-02-18 16:51:43 -08:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-08-02 01:46:29 +02:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
2021-02-18 16:51:43 -08:00
2025-02-12 20:56:46 +01:00
const s3 = createS3Client ( apiConfig , { retryStrategy : null } ) ;
2021-02-18 16:51:43 -08:00
2025-08-02 01:46:29 +02:00
const fullRemotePath = path . join ( apiConfig . prefix , remotePath ) ;
if ( ! fullRemotePath . endsWith ( '/' ) ) { // check for file
2022-04-14 07:35:41 -05:00
const params = {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Key : fullRemotePath
2022-04-14 07:35:41 -05:00
} ;
2021-02-18 16:51:43 -08:00
2025-02-12 20:56:46 +01:00
const [ error , response ] = await safe ( s3 . headObject ( params ) ) ;
2022-04-14 08:07:03 -05:00
if ( error && S3 _NOT _FOUND ( error ) ) return false ;
2025-08-02 01:46:29 +02:00
if ( error ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Error headObject ${ fullRemotePath } . ${ formatError ( error ) } ` ) ;
2024-07-14 22:04:12 +02:00
if ( ! response || typeof response . Metadata !== 'object' ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'not a s3 endpoint' ) ;
2021-02-18 16:51:43 -08:00
2022-04-14 08:07:03 -05:00
return true ;
2022-04-14 07:35:41 -05:00
} else { // list dir contents
const listParams = {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Prefix : fullRemotePath ,
2022-04-14 07:35:41 -05:00
MaxKeys : 1
} ;
2021-02-18 16:51:43 -08:00
2025-05-26 14:16:44 +02:00
const [ error , listData ] = await safe ( s3 . listObjectsV2 ( listParams ) ) ;
2025-08-02 01:46:29 +02:00
if ( error ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Error listing objects ${ fullRemotePath } . ${ formatError ( error ) } ` ) ;
2021-02-18 16:51:43 -08:00
2025-07-22 18:55:44 +02:00
return listData . KeyCount !== 0 || listData . Contents . length !== 0 ;
2022-04-14 07:35:41 -05:00
}
2021-02-18 16:51:43 -08:00
}
2022-04-29 18:23:56 -07:00
// Download the object in small parts. By downloading small parts, we reduce the chance of sporadic network errors when downloading large objects
// We can retry each part individually, but we haven't had the need for this yet
class S3MultipartDownloadStream extends Readable {
constructor ( s3 , params , options ) {
super ( options ) ;
this . _s3 = s3 ;
this . _params = params ;
this . _readSize = 0 ;
this . _fileSize = - 1 ;
this . _path = params . Bucket + '/' + params . Key ;
this . _blockSize = options . blockSize || 64 * 1048576 ; // MB
}
_done ( ) {
this . _readSize = 0 ;
this . push ( null ) ; // EOF
}
_handleError ( error ) {
if ( S3 _NOT _FOUND ( error ) ) {
this . destroy ( new BoxError ( BoxError . NOT _FOUND , ` Backup not found: ${ this . _path } ` ) ) ;
} else {
2023-04-16 10:49:59 +02:00
debug ( ` download: ${ this . _path } s3 stream error. %o ` , error ) ;
2025-07-14 15:45:33 +02:00
this . destroy ( new BoxError ( BoxError . EXTERNAL _ERROR , ` Error multipartDownload ${ this . _path } . ${ formatError ( error ) } ` ) ) ;
2022-04-29 18:23:56 -07:00
}
}
2025-02-12 20:56:46 +01:00
async _downloadRange ( offset , length ) {
2022-04-29 18:23:56 -07:00
const params = Object . assign ( { } , this . _params ) ;
const lastPos = offset + length - 1 ;
const range = ` bytes= ${ offset } - ${ lastPos } ` ;
params [ 'Range' ] = range ;
2025-02-12 20:56:46 +01:00
const [ error , data ] = await safe ( this . _s3 . getObject ( params ) ) ;
if ( error ) return this . _handleError ( error ) ;
2022-04-29 18:23:56 -07:00
2025-02-12 20:56:46 +01:00
const contentLength = parseInt ( data . ContentLength , 10 ) ; // should be same as length
2022-04-29 18:23:56 -07:00
2025-02-12 20:56:46 +01:00
if ( contentLength > 0 ) {
this . _readSize += contentLength ;
2025-02-13 19:23:04 +01:00
const body = await consumers . buffer ( data . Body ) ; // data.Body.transformToString('binary') also works
2025-02-13 15:17:48 +01:00
this . push ( body ) ;
2025-02-12 20:56:46 +01:00
} else {
this . _done ( ) ;
}
2022-04-29 18:23:56 -07:00
}
_nextDownload ( ) {
let len = 0 ;
if ( this . _readSize + this . _blockSize < this . _fileSize ) {
len = this . _blockSize ;
} else {
len = this . _fileSize - this . _readSize ;
}
this . _downloadRange ( this . _readSize , len ) ;
}
2025-02-12 20:56:46 +01:00
async _fetchSize ( ) {
const [ error , data ] = await safe ( this . _s3 . headObject ( this . _params ) ) ;
if ( error ) return this . _handleError ( error ) ;
2022-04-29 18:23:56 -07:00
2025-02-12 20:56:46 +01:00
const length = parseInt ( data . ContentLength , 10 ) ;
2022-04-29 18:23:56 -07:00
2025-02-12 20:56:46 +01:00
if ( length > 0 ) {
this . _fileSize = length ;
this . _nextDownload ( ) ;
} else {
this . _done ( ) ;
}
2022-04-29 18:23:56 -07:00
}
2025-02-12 20:56:46 +01:00
_read ( ) { // reimp
2022-04-29 18:23:56 -07:00
if ( this . _readSize === this . _fileSize ) return this . _done ( ) ;
if ( this . _readSize === 0 ) return this . _fetchSize ( ) ;
this . _nextDownload ( ) ;
}
}
2025-08-02 01:46:29 +02:00
async function download ( apiConfig , remotePath ) {
2016-09-19 15:03:38 +02:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-08-02 01:46:29 +02:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
2016-09-19 15:03:38 +02:00
2022-04-14 07:35:41 -05:00
const params = {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Key : path . join ( apiConfig . prefix , remotePath )
2022-04-14 07:35:41 -05:00
} ;
2017-04-18 15:33:06 +02:00
2025-02-12 20:56:46 +01:00
const s3 = createS3Client ( apiConfig , { retryStrategy : RETRY _STRATEGY } ) ;
2023-07-24 22:25:06 +05:30
return new S3MultipartDownloadStream ( s3 , params , { blockSize : 64 * 1024 * 1024 } ) ;
2016-09-16 18:14:36 +02:00
}
2025-08-02 01:46:29 +02:00
async function listDir ( apiConfig , remotePath , batchSize , marker ) {
2018-07-28 09:05:44 -07:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-08-02 01:46:29 +02:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
2018-07-28 09:05:44 -07:00
assert . strictEqual ( typeof batchSize , 'number' ) ;
2025-02-12 18:46:54 +01:00
assert ( typeof marker !== 'undefined' ) ;
2018-07-28 09:05:44 -07:00
2025-02-12 20:56:46 +01:00
const s3 = createS3Client ( apiConfig , { retryStrategy : RETRY _STRATEGY } ) ;
2025-08-02 01:46:29 +02:00
const fullRemotePath = path . join ( apiConfig . prefix , remotePath ) ;
2022-04-14 07:35:41 -05:00
const listParams = {
Bucket : apiConfig . bucket ,
2025-08-25 19:15:18 +02:00
Prefix : fullRemotePath + '/' , // ensure we list contents of the directory and not match other filenames with prefix
2022-04-14 07:35:41 -05:00
MaxKeys : batchSize
} ;
2025-05-26 14:16:44 +02:00
if ( marker ) listParams . ContinuationToken = marker ;
2017-09-23 14:27:35 -07:00
2025-05-26 14:16:44 +02:00
const [ error , listData ] = await safe ( s3 . listObjectsV2 ( listParams ) ) ;
2025-08-02 01:46:29 +02:00
if ( error ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Error listing objects in ${ fullRemotePath } . ${ formatError ( error ) } ` ) ;
2025-07-22 18:55:44 +02:00
if ( listData . KeyCount === 0 || listData . Contents . length === 0 ) return { entries : [ ] , marker : null } ; // no more
2025-08-16 07:43:43 +05:30
const entries = listData . Contents . map ( function ( c ) { return { path : path . relative ( apiConfig . prefix , c . Key ) , size : c . Size } ; } ) ;
2025-05-26 14:16:44 +02:00
return { entries , marker : ! listData . IsTruncated ? null : listData . NextContinuationToken } ;
2017-09-23 14:27:35 -07:00
}
2018-02-03 22:00:33 -08:00
// https://github.com/aws/aws-sdk-js/blob/2b6bcbdec1f274fe931640c1b61ece999aae7a19/lib/util.js#L41
// https://github.com/GeorgePhillips/node-s3-url-encode/blob/master/index.js
// See aws-sdk-js/issues/1302
function encodeCopySource ( bucket , path ) {
// AWS percent-encodes some extra non-standard characters in a URI
2022-04-14 07:35:41 -05:00
const output = encodeURI ( path ) . replace ( /[+!"#$@&'()*+,:;=?@]/g , function ( ch ) {
2018-02-03 22:00:33 -08:00
return '%' + ch . charCodeAt ( 0 ) . toString ( 16 ) . toUpperCase ( ) ;
} ) ;
// the slash at the beginning is optional
return ` / ${ bucket } / ${ output } ` ;
}
2025-08-25 23:45:14 +02:00
async function copyInternal ( apiConfig , fullFromPath , fullToPath , fileSize , progressCallback ) {
2016-03-31 09:48:01 -07:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-08-02 01:46:29 +02:00
assert . strictEqual ( typeof fullFromPath , 'string' ) ;
assert . strictEqual ( typeof fullToPath , 'string' ) ;
assert . strictEqual ( typeof fileSize , 'number' ) ;
2022-04-30 16:01:42 -07:00
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2015-09-21 14:02:00 -07:00
2025-02-12 20:56:46 +01:00
const s3 = createS3Client ( apiConfig , { retryStrategy : RETRY _STRATEGY } ) ; // https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html
2018-03-21 14:58:36 -07:00
2025-02-12 20:56:46 +01:00
function throwError ( error ) {
2025-08-02 01:46:29 +02:00
if ( error ) debug ( ` copy: s3 copy error when copying ${ fullFromPath } : ${ error } ` ) ;
2017-09-22 14:40:37 -07:00
2025-08-02 01:46:29 +02:00
if ( error && S3 _NOT _FOUND ( error ) ) throw new BoxError ( BoxError . NOT _FOUND , ` Old backup not found: ${ fullFromPath } ` ) ;
if ( error ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Error copying ${ fullFromPath } ( ${ fileSize } bytes): ${ error . Code || '' } ${ error } ` ) ;
2025-02-12 20:56:46 +01:00
}
2018-02-22 10:31:56 -08:00
2025-02-12 20:56:46 +01:00
const copyParams = {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Key : fullToPath
2025-02-12 20:56:46 +01:00
} ;
2017-10-11 13:57:05 -07:00
2025-02-12 20:56:46 +01:00
// S3 copyObject has a file size limit of 5GB so if we have larger files, we do a multipart copy
2025-08-02 01:46:29 +02:00
const provider = apiConfig . _provider ;
const largeFileLimit = ( provider === 'vultr-objectstorage' || provider === 'exoscale-sos' || provider === 'backblaze-b2' || provider === 'digitalocean-spaces' ) ? 1024 * 1024 * 1024 : 3 * 1024 * 1024 * 1024 ;
2017-10-04 16:54:56 +02:00
2025-08-02 01:46:29 +02:00
if ( fileSize < largeFileLimit ) {
progressCallback ( { message : ` Copying ${ fullFromPath } ` } ) ;
2017-10-04 11:00:30 -07:00
2025-08-02 01:46:29 +02:00
copyParams . CopySource = encodeCopySource ( apiConfig . bucket , fullFromPath ) ;
2025-02-12 20:56:46 +01:00
const [ copyError ] = await safe ( s3 . copyObject ( copyParams ) ) ;
if ( copyError ) return throwError ( copyError ) ;
return ;
}
2017-10-04 16:54:56 +02:00
2025-08-02 01:46:29 +02:00
progressCallback ( { message : ` Copying (multipart) ${ fullFromPath } ` } ) ;
2017-10-04 16:54:56 +02:00
2025-02-12 20:56:46 +01:00
const [ createMultipartError , multipart ] = await safe ( s3 . createMultipartUpload ( copyParams ) ) ;
if ( createMultipartError ) return throwError ( createMultipartError ) ;
2017-10-04 16:54:56 +02:00
2025-02-12 20:56:46 +01:00
// Exoscale (96M) was suggested by exoscale. 1GB for others is arbitrary size
2025-08-02 01:46:29 +02:00
const chunkSize = provider === 'exoscale-sos' ? 96 * 1024 * 1024 : 1024 * 1024 * 1024 ;
2025-02-12 20:56:46 +01:00
const uploadId = multipart . UploadId ;
const uploadedParts = [ ] , ranges = [ ] ;
2017-10-04 16:54:56 +02:00
2025-02-12 20:56:46 +01:00
let cur = 0 ;
2025-08-02 01:46:29 +02:00
while ( cur + chunkSize < fileSize ) {
2025-02-12 20:56:46 +01:00
ranges . push ( { startBytes : cur , endBytes : cur + chunkSize - 1 } ) ;
cur += chunkSize ;
}
2025-08-02 01:46:29 +02:00
ranges . push ( { startBytes : cur , endBytes : fileSize - 1 } ) ;
2017-10-04 16:54:56 +02:00
2025-02-12 20:56:46 +01:00
const [ copyError ] = await safe ( async . eachOfLimit ( ranges , 3 , async function copyChunk ( range , index ) {
const partCopyParams = {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Key : fullToPath ,
CopySource : encodeCopySource ( apiConfig . bucket , fullFromPath ) , // See aws-sdk-js/issues/1302
2025-02-12 20:56:46 +01:00
CopySourceRange : 'bytes=' + range . startBytes + '-' + range . endBytes ,
PartNumber : index + 1 ,
UploadId : uploadId
} ;
2020-09-02 22:32:42 -07:00
2025-02-12 20:56:46 +01:00
progressCallback ( { message : ` Copying part ${ partCopyParams . PartNumber } - ${ partCopyParams . CopySource } ${ partCopyParams . CopySourceRange } ` } ) ;
2020-09-02 22:32:42 -07:00
2025-02-12 20:56:46 +01:00
const part = await s3 . uploadPartCopy ( partCopyParams ) ;
progressCallback ( { message : ` Copied part ${ partCopyParams . PartNumber } - Etag: ${ part . CopyPartResult . ETag } ` } ) ;
2019-03-26 11:58:32 -07:00
2025-02-12 20:56:46 +01:00
if ( ! part . CopyPartResult . ETag ) throw new Error ( 'Multi-part copy is broken or not implemented by the S3 storage provider' ) ;
2017-10-04 16:54:56 +02:00
2025-02-12 20:56:46 +01:00
uploadedParts [ index ] = { ETag : part . CopyPartResult . ETag , PartNumber : partCopyParams . PartNumber } ;
} ) ) ;
2022-04-14 07:35:41 -05:00
2025-02-13 11:08:00 +01:00
if ( copyError ) {
2025-02-12 20:56:46 +01:00
const abortParams = {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Key : fullToPath ,
2025-02-12 20:56:46 +01:00
UploadId : uploadId
} ;
2025-08-02 01:46:29 +02:00
progressCallback ( { message : ` Aborting multipart copy of ${ fullFromPath } ` } ) ;
2025-02-12 20:56:46 +01:00
await safe ( s3 . abortMultipartUpload ( abortParams ) , { debug } ) ; // ignore any abort errors
return throwError ( copyError ) ;
2017-10-11 13:57:05 -07:00
}
2025-02-12 20:56:46 +01:00
const completeMultipartParams = {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Key : fullToPath ,
2025-02-12 20:56:46 +01:00
MultipartUpload : { Parts : uploadedParts } ,
UploadId : uploadId
} ;
progressCallback ( { message : ` Finishing multipart copy - ${ completeMultipartParams . Key } ` } ) ;
const [ completeMultipartError ] = await safe ( s3 . completeMultipartUpload ( completeMultipartParams ) ) ;
if ( completeMultipartError ) return throwError ( completeMultipartError ) ;
}
2025-08-02 01:46:29 +02:00
async function copy ( apiConfig , fromPath , toPath , progressCallback ) {
2025-02-12 20:56:46 +01:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-08-02 01:46:29 +02:00
assert . strictEqual ( typeof fromPath , 'string' ) ;
assert . strictEqual ( typeof toPath , 'string' ) ;
2025-02-12 20:56:46 +01:00
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2025-08-25 23:45:14 +02:00
const fullFromPath = path . join ( apiConfig . prefix , fromPath ) ;
const fullToPath = path . join ( apiConfig . prefix , toPath ) ;
const params = {
Bucket : apiConfig . bucket ,
Key : fullFromPath
} ;
const s3 = createS3Client ( apiConfig , { retryStrategy : RETRY _STRATEGY } ) ; // https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html
const [ error , data ] = await safe ( s3 . headObject ( params ) ) ;
if ( error && S3 _NOT _FOUND ( error ) ) throw new BoxError ( BoxError . NOT _FOUND , ` Path ${ fromPath } not found ` ) ;
if ( error ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Error headObject ${ fromPath } . ${ formatError ( error ) } ` ) ;
return await copyInternal ( apiConfig , fullFromPath , fullToPath , data . ContentLength , progressCallback ) ;
}
async function copyDir ( apiConfig , fromPath , toPath , progressCallback ) {
assert . strictEqual ( typeof apiConfig , 'object' ) ;
assert . strictEqual ( typeof fromPath , 'string' ) ;
assert . strictEqual ( typeof toPath , 'string' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2020-09-02 22:32:42 -07:00
let total = 0 ;
2025-08-02 01:46:29 +02:00
const concurrency = apiConfig . limits ? . copyConcurrency || ( apiConfig . _provider === 's3' ? 500 : 10 ) ;
2025-08-25 23:45:14 +02:00
progressCallback ( { message : ` Copying ${ fromPath } to ${ toPath } with concurrency of ${ concurrency } ` } ) ;
2022-04-30 16:01:42 -07:00
2025-02-12 18:46:54 +01:00
let marker = null ;
while ( true ) {
2025-08-25 23:45:14 +02:00
const batch = await listDir ( apiConfig , fromPath , 1000 , marker ) ; // returned entries are relative to prefix
2025-02-12 18:46:54 +01:00
total += batch . entries . length ;
progressCallback ( { message : ` Copying files from ${ total - batch . entries . length } - ${ total } ` } ) ;
2025-08-02 01:46:29 +02:00
await async . eachLimit ( batch . entries , concurrency , async ( entry ) => {
2025-08-16 07:43:43 +05:30
const fullFromPath = path . join ( apiConfig . prefix , entry . path ) ;
const fullToPath = path . join ( apiConfig . prefix , toPath , path . relative ( fromPath , entry . path ) ) ;
2025-08-25 23:45:14 +02:00
await copyInternal ( apiConfig , fullFromPath , fullToPath , entry . size , progressCallback ) ;
2025-08-02 01:46:29 +02:00
} ) ;
2025-02-12 18:46:54 +01:00
if ( ! batch . marker ) break ;
marker = batch . marker ;
}
2017-10-04 11:00:30 -07:00
2025-02-12 18:46:54 +01:00
progressCallback ( { message : ` Copied ${ total } files ` } ) ;
2015-09-21 14:02:00 -07:00
}
2016-10-10 15:04:28 +02:00
2025-08-02 01:46:29 +02:00
async function remove ( apiConfig , remotePath ) {
2017-09-27 17:34:49 -07:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-08-02 01:46:29 +02:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
2017-09-27 17:34:49 -07:00
2025-02-12 20:56:46 +01:00
const s3 = createS3Client ( apiConfig , { retryStrategy : RETRY _STRATEGY } ) ;
2017-09-27 17:34:49 -07:00
2025-08-02 01:46:29 +02:00
const fullRemotePath = path . join ( apiConfig . prefix , remotePath ) ;
2022-04-14 07:35:41 -05:00
const deleteParams = {
Bucket : apiConfig . bucket ,
2025-08-02 01:46:29 +02:00
Key : fullRemotePath
2022-04-14 07:35:41 -05:00
} ;
2017-09-27 17:34:49 -07:00
2025-07-15 00:13:40 +02:00
// deleteObject does not return error if key is not found
const [ error ] = await safe ( s3 . deleteObject ( deleteParams ) ) ;
2025-08-02 01:46:29 +02:00
if ( error ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Unable to remove ${ fullRemotePath } . ${ formatError ( error ) } ` ) ;
2017-09-27 17:34:49 -07:00
}
2022-04-15 09:25:54 -05:00
function chunk ( array , size ) {
assert ( Array . isArray ( array ) ) ;
assert . strictEqual ( typeof size , 'number' ) ;
const length = array . length ;
if ( ! length ) return [ ] ;
2024-07-02 14:54:40 +02:00
let index = 0 , resIndex = 0 ;
const result = Array ( Math . ceil ( length / size ) ) ;
2022-04-15 09:25:54 -05:00
for ( ; index < length ; index += size ) {
result [ resIndex ++ ] = array . slice ( index , index + size ) ;
}
return result ;
}
2025-08-02 01:46:29 +02:00
async function removeDir ( apiConfig , remotePathPrefix , progressCallback ) {
2016-10-10 15:04:28 +02:00
assert . strictEqual ( typeof apiConfig , 'object' ) ;
2025-08-02 01:46:29 +02:00
assert . strictEqual ( typeof remotePathPrefix , 'string' ) ;
2022-04-14 16:07:01 -05:00
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2016-10-10 15:04:28 +02:00
2025-07-15 00:31:07 +02:00
// only use this client for DeleteObjects call. It forces md5 checksum and for anything else, it might crash
const deleteObjectsS3Client = createS3Client ( apiConfig , { retryStrategy : RETRY _STRATEGY , deleteObjects : true } ) ;
2022-04-14 16:07:01 -05:00
let total = 0 ;
2025-02-12 18:46:54 +01:00
let marker = null ;
while ( true ) {
2025-08-25 23:45:14 +02:00
const batch = await listDir ( apiConfig , remotePathPrefix , 1000 , marker ) ; // returns entries relative to (root) prefix
2017-10-11 13:57:05 -07:00
2025-02-12 18:46:54 +01:00
const entries = batch . entries ;
2022-04-14 07:35:41 -05:00
total += entries . length ;
2018-02-22 12:14:13 -08:00
2025-08-02 01:46:29 +02:00
const chunkSize = apiConfig . limits ? . deleteConcurrency || ( apiConfig . _provider !== 'digitalocean-spaces' ? 1000 : 100 ) ; // throttle objects in each request
2022-04-14 16:07:01 -05:00
const chunks = chunk ( entries , chunkSize ) ;
2018-02-22 12:14:13 -08:00
2025-02-12 18:46:54 +01:00
await async . eachSeries ( chunks , async function deleteFiles ( objects ) {
2022-04-14 16:07:01 -05:00
const deleteParams = {
2022-04-14 07:35:41 -05:00
Bucket : apiConfig . bucket ,
Delete : {
2025-08-02 10:24:51 +02:00
Objects : objects . map ( function ( o ) { return { Key : path . join ( apiConfig . prefix , o . path ) } ; } )
2022-04-14 07:35:41 -05:00
}
} ;
2018-02-22 12:14:13 -08:00
2025-08-02 10:24:51 +02:00
const fullFirstPath = path . join ( apiConfig . prefix , objects [ 0 ] . path ) , fullLastPath = path . join ( apiConfig . prefix , objects [ objects . length - 1 ] . path ) ;
2025-08-02 01:46:29 +02:00
progressCallback ( { message : ` Removing ${ objects . length } files from ${ fullFirstPath } to ${ fullLastPath } ` } ) ;
2017-10-10 20:23:04 -07:00
2022-04-14 07:35:41 -05:00
// deleteObjects does not return error if key is not found
2025-07-15 00:31:07 +02:00
const [ error ] = await safe ( deleteObjectsS3Client . deleteObjects ( deleteParams ) ) ;
2022-04-14 16:07:01 -05:00
if ( error ) {
2025-08-02 01:46:29 +02:00
progressCallback ( { message : ` Unable to remove from ${ fullFirstPath } to ${ fullLastPath } ${ error . message || error . Code } ` } ) ;
throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Unable to remove from ${ fullFirstPath } to ${ fullLastPath } . error: ${ error . message } ` ) ;
2022-04-14 16:07:01 -05:00
}
2025-02-12 18:46:54 +01:00
} ) ;
if ( ! batch . marker ) break ;
marker = batch . marker ;
}
2017-10-10 20:23:04 -07:00
2022-04-14 16:07:01 -05:00
progressCallback ( { message : ` Removed ${ total } files ` } ) ;
2016-10-10 15:04:28 +02:00
}
2016-10-11 11:36:25 +02:00
2025-02-13 11:08:00 +01:00
// often, the AbortIncompleteMultipartUpload lifecycle rule is not added to the bucket resulting in large bucket sizes over time
async function cleanup ( apiConfig , progressCallback ) {
assert . strictEqual ( typeof apiConfig , 'object' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
const s3 = createS3Client ( apiConfig , { retryStrategy : RETRY _STRATEGY } ) ;
const uploads = await s3 . listMultipartUploads ( { Bucket : apiConfig . bucket , Prefix : apiConfig . prefix } ) ;
progressCallback ( { message : ` Cleaning up any aborted multi-part uploads. count: ${ uploads . Uploads ? . length || 0 } truncated: ${ uploads . IsTruncated } ` } ) ;
if ( ! uploads . Uploads ) return ;
for ( const upload of uploads . Uploads ) {
if ( Date . now ( ) - new Date ( upload . Initiated ) < 3 * 24 * 60 * 60 * 1000 ) continue ; // 3 days ago
progressCallback ( { message : ` Cleaning up multi-part upload uploadId: ${ upload . UploadId } key: ${ upload . Key } ` } ) ;
await safe ( s3 . abortMultipartUpload ( { Bucket : apiConfig . bucket , Key : upload . Key , UploadId : upload . UploadId } ) , { debug } ) ; // ignore error
}
}
2025-08-01 18:55:04 +02:00
async function verifyConfig ( { id , provider , config } ) {
assert . strictEqual ( typeof id , 'string' ) ;
assert . strictEqual ( typeof provider , 'string' ) ;
assert . strictEqual ( typeof config , 'object' ) ;
2016-10-11 11:36:25 +02:00
2025-08-01 18:55:04 +02:00
if ( typeof config . accessKeyId !== 'string' ) throw new BoxError ( BoxError . BAD _FIELD , 'accessKeyId must be a string' ) ;
if ( typeof config . secretAccessKey !== 'string' ) throw new BoxError ( BoxError . BAD _FIELD , 'secretAccessKey must be a string' ) ;
2017-09-27 10:25:36 -07:00
2025-08-01 18:55:04 +02:00
if ( typeof config . bucket !== 'string' ) throw new BoxError ( BoxError . BAD _FIELD , 'bucket must be a string' ) ;
2020-02-11 11:14:38 -08:00
// the node module seems to incorrectly accept bucket name with '/'
2025-08-01 18:55:04 +02:00
if ( config . bucket . includes ( '/' ) ) throw new BoxError ( BoxError . BAD _FIELD , 'bucket name cannot contain "/"' ) ;
2020-02-11 11:14:38 -08:00
2020-05-27 16:58:27 -07:00
// names must be lowercase and start with a letter or number. can contain dashes
2025-08-01 18:55:04 +02:00
if ( config . bucket . includes ( '_' ) || config . bucket . match ( /[A-Z]/ ) ) throw new BoxError ( BoxError . BAD _FIELD , 'bucket name cannot contain "_" or capitals' ) ;
2020-05-27 16:58:27 -07:00
2025-08-01 18:55:04 +02:00
if ( typeof config . prefix !== 'string' ) throw new BoxError ( BoxError . BAD _FIELD , 'prefix must be a string' ) ;
if ( 'signatureVersion' in config && typeof config . signatureVersion !== 'string' ) throw new BoxError ( BoxError . BAD _FIELD , 'signatureVersion must be a string' ) ;
if ( 'endpoint' in config && typeof config . endpoint !== 'string' ) throw new BoxError ( BoxError . BAD _FIELD , 'endpoint must be a string' ) ;
2025-10-09 12:20:55 +02:00
if ( 'region' in config && typeof config . region !== 'string' ) throw new BoxError ( BoxError . BAD _FIELD , 'region must be a string' ) ;
2016-10-11 11:36:25 +02:00
2025-08-01 18:55:04 +02:00
if ( 'acceptSelfSignedCerts' in config && typeof config . acceptSelfSignedCerts !== 'boolean' ) throw new BoxError ( BoxError . BAD _FIELD , 'acceptSelfSignedCerts must be a boolean' ) ;
if ( 's3ForcePathStyle' in config && typeof config . s3ForcePathStyle !== 'boolean' ) throw new BoxError ( BoxError . BAD _FIELD , 's3ForcePathStyle must be a boolean' ) ;
2020-05-27 17:33:59 -07:00
2022-04-14 07:59:50 -05:00
const putParams = {
2025-08-01 18:55:04 +02:00
Bucket : config . bucket ,
Key : path . join ( config . prefix , 'snapshot/cloudron-testfile' ) ,
2022-04-14 07:35:41 -05:00
Body : 'testcontent'
} ;
2016-10-11 11:46:28 +02:00
2025-08-01 18:55:04 +02:00
const s3 = createS3Client ( config , { } ) ;
2025-02-12 20:56:46 +01:00
const [ putError ] = await safe ( s3 . putObject ( putParams ) ) ;
2025-07-14 15:45:33 +02:00
if ( putError ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Error put object cloudron-testfile. ${ formatError ( putError ) } ` ) ;
2016-10-11 11:46:28 +02:00
2023-01-17 11:03:59 +01:00
const listParams = {
2025-08-01 18:55:04 +02:00
Bucket : config . bucket ,
Prefix : path . join ( config . prefix , 'snapshot' ) ,
2023-01-17 11:03:59 +01:00
MaxKeys : 1
} ;
2025-05-26 14:16:44 +02:00
const [ listError ] = await safe ( s3 . listObjectsV2 ( listParams ) ) ;
2025-07-14 15:45:33 +02:00
if ( listError ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Error listing objects. ${ formatError ( listError ) } ` ) ;
2023-01-17 11:03:59 +01:00
2022-04-14 07:59:50 -05:00
const delParams = {
2025-08-01 18:55:04 +02:00
Bucket : config . bucket ,
Key : path . join ( config . prefix , 'snapshot/cloudron-testfile' )
2022-04-14 07:59:50 -05:00
} ;
2016-10-11 11:46:28 +02:00
2025-02-12 20:56:46 +01:00
const [ delError ] = await safe ( s3 . deleteObject ( delParams ) ) ;
2025-07-14 15:45:33 +02:00
if ( delError ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` Error del object cloudron-testfile. ${ formatError ( delError ) } ` ) ;
2025-08-01 18:55:04 +02:00
const newConfig = _ . pick ( config , [ 'accessKeyId' , 'secretAccessKey' , 'bucket' , 'prefix' , 'signatureVersion' , 'acceptSelfSignedCerts' , 'endpoint' , 's3ForcePathStyle' ] ) ;
2025-08-02 01:46:29 +02:00
return { _provider : provider , ... newConfig } ;
2016-10-11 11:36:25 +02:00
}
2019-02-09 18:08:10 -08:00
2025-08-01 14:54:32 +02:00
async function setup ( apiConfig ) {
assert . strictEqual ( typeof apiConfig , 'object' ) ;
}
async function teardown ( apiConfig ) {
assert . strictEqual ( typeof apiConfig , 'object' ) ;
}
2019-02-09 18:08:10 -08:00
function removePrivateFields ( apiConfig ) {
2025-10-08 15:44:58 +02:00
delete apiConfig . secretAccessKey ;
2025-08-02 01:46:29 +02:00
delete apiConfig . _provider ;
2019-02-09 18:08:10 -08:00
return apiConfig ;
}
function injectPrivateFields ( newConfig , currentConfig ) {
2025-10-08 15:44:58 +02:00
if ( ! Object . hasOwn ( newConfig , 'secretAccessKey' ) ) newConfig . secretAccessKey = currentConfig . secretAccessKey ;
2025-08-02 01:46:29 +02:00
newConfig . _provider = currentConfig . _provider ;
2019-02-09 18:08:10 -08:00
}
2025-10-08 15:44:58 +02:00