2022-04-28 18:43:14 -07:00
'use strict' ;
exports = module . exports = {
download ,
upload ,
2025-08-01 22:58:19 +02:00
getFileExtension ,
2022-04-28 18:43:14 -07:00
_saveFsMetadata : saveFsMetadata ,
_restoreFsMetadata : restoreFsMetadata
} ;
const assert = require ( 'assert' ) ,
async = require ( 'async' ) ,
2025-08-01 14:54:32 +02:00
backupTargets = require ( '../backuptargets.js' ) ,
2022-04-28 18:43:14 -07:00
BoxError = require ( '../boxerror.js' ) ,
2025-08-13 15:25:23 +05:30
crypto = require ( 'crypto' ) ,
2022-04-28 18:43:14 -07:00
DataLayout = require ( '../datalayout.js' ) ,
2023-08-29 17:44:02 +05:30
{ DecryptStream } = require ( '../hush.js' ) ,
2022-04-28 18:43:14 -07:00
debug = require ( 'debug' ) ( 'box:backupformat/rsync' ) ,
2024-07-08 14:56:55 +02:00
{ EncryptStream } = require ( '../hush.js' ) ,
2022-04-28 18:43:14 -07:00
fs = require ( 'fs' ) ,
hush = require ( '../hush.js' ) ,
path = require ( 'path' ) ,
2025-07-30 11:19:07 +02:00
paths = require ( '../paths.js' ) ,
2023-08-29 17:44:02 +05:30
ProgressStream = require ( '../progress-stream.js' ) ,
2023-08-01 19:03:24 +05:30
promiseRetry = require ( '../promise-retry.js' ) ,
2025-08-13 09:39:36 +05:30
{ Readable } = require ( 'stream' ) ,
2022-04-28 18:43:14 -07:00
safe = require ( 'safetydance' ) ,
2024-10-14 19:10:31 +02:00
shell = require ( '../shell.js' ) ( 'backupformat/rsync' ) ,
2024-07-08 14:56:55 +02:00
stream = require ( 'stream/promises' ) ,
2025-02-12 18:46:54 +01:00
syncer = require ( '../syncer.js' ) ;
2022-04-28 18:43:14 -07:00
2024-07-08 14:56:55 +02:00
async function addFile ( sourceFile , encryption , uploader , progressCallback ) {
assert . strictEqual ( typeof sourceFile , 'string' ) ;
assert . strictEqual ( typeof encryption , 'object' ) ;
assert . strictEqual ( typeof uploader , 'object' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
// make sure file can be opened for reading before we start the pipeline. otherwise, we end up with
// destinations dirs/file which are owned by root (this process id) and cannot be copied (run as normal user)
const [ openError , sourceHandle ] = await safe ( fs . promises . open ( sourceFile , 'r' ) ) ;
if ( openError ) {
debug ( ` addFile: ignoring disappeared file: ${ sourceFile } ` ) ;
return ;
}
const sourceStream = sourceHandle . createReadStream ( sourceFile , { autoClose : true } ) ;
const ps = new ProgressStream ( { interval : 10000 } ) ; // display a progress every 10 seconds
ps . on ( 'progress' , function ( progress ) {
const transferred = Math . round ( progress . transferred / 1024 / 1024 ) , speed = Math . round ( progress . speed / 1024 / 1024 ) ;
if ( ! transferred && ! speed ) return progressCallback ( { message : ` Uploading ${ sourceFile } ` } ) ; // 0M@0MBps looks wrong
progressCallback ( { message : ` Uploading ${ sourceFile } : ${ transferred } M@ ${ speed } MBps ` } ) ; // 0M@0MBps looks wrong
} ) ;
2025-08-13 09:39:36 +05:30
const hash = crypto . createHash ( 'sha256' ) ;
2024-07-08 14:56:55 +02:00
let pipeline = null ;
if ( encryption ) {
const encryptStream = new EncryptStream ( encryption ) ;
2025-08-13 09:39:36 +05:30
pipeline = safe ( stream . pipeline ( sourceStream , encryptStream , ps , hash , uploader . stream ) ) ;
2024-07-08 14:56:55 +02:00
} else {
2025-08-13 09:39:36 +05:30
pipeline = safe ( stream . pipeline ( sourceStream , ps , hash , uploader . stream ) ) ;
2024-07-08 14:56:55 +02:00
}
const [ error ] = await safe ( pipeline ) ;
2025-08-13 09:39:36 +05:30
if ( error && ! error . message . includes ( 'ENOENT' ) ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` tarPack pipeline error: ${ error . message } ` ) ; // ignore error if file disappears
2024-07-08 14:56:55 +02:00
// debug(`addFile: pipeline finished: ${JSON.stringify(ps.stats())}`);
await uploader . finish ( ) ;
2025-08-13 09:39:36 +05:30
return {
stats : ps . stats ( ) ,
integrity : { size : ps . stats ( ) . transferred , sha256 : hash . digest ( 'hex' ) }
} ;
2024-07-08 14:56:55 +02:00
}
2025-07-25 13:49:37 +02:00
async function sync ( backupTarget , remotePath , dataLayout , progressCallback ) {
assert . strictEqual ( typeof backupTarget , 'object' ) ;
2022-04-28 18:43:14 -07:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
// the number here has to take into account the s3.upload partSize (which is 10MB). So 20=200MB
2025-07-25 13:49:37 +02:00
const concurrency = backupTarget . limits ? . syncConcurrency || ( backupTarget . provider === 's3' ? 20 : 10 ) ;
2025-07-30 11:19:07 +02:00
const cacheFile = path . join ( paths . BACKUP _INFO _DIR , backupTarget . id , ` ${ dataLayout . getBasename ( ) } .sync.cache ` ) ;
2025-08-13 09:39:36 +05:30
const { delQueue , addQueue , integrityMap } = await syncer . sync ( dataLayout , cacheFile ) ;
debug ( ` sync: processing ${ delQueue . length } deletes and ${ addQueue . length } additions ` ) ;
const aggredgatedStats = { added : addQueue . length , deleted : delQueue . length , size : 0 , startTime : Date . now ( ) } ;
2025-02-13 17:05:35 +01:00
2025-08-13 09:09:48 +05:30
async function processSyncerChange ( change ) {
debug ( 'sync: processing task: %j' , change ) ;
// the empty task.path is special to signify the directory
const destPath = change . path && backupTarget . encryption ? . encryptedFilenames ? hush . encryptFilePath ( change . path , backupTarget . encryption ) : change . path ;
const fullPath = path . join ( remotePath , destPath ) ;
if ( change . operation === 'removedir' ) {
debug ( ` Removing directory ${ fullPath } ` ) ;
await backupTargets . storageApi ( backupTarget ) . removeDir ( backupTarget . config , fullPath , progressCallback ) ;
} else if ( change . operation === 'remove' ) {
debug ( ` Removing ${ fullPath } ` ) ;
await backupTargets . storageApi ( backupTarget ) . remove ( backupTarget . config , fullPath ) ;
} else if ( change . operation === 'add' ) {
await promiseRetry ( { times : 5 , interval : 20000 , debug } , async ( retryCount ) => {
progressCallback ( { message : ` Adding ${ change . path } ` + ( retryCount > 1 ? ` (Try ${ retryCount } ) ` : '' ) } ) ;
debug ( ` Adding ${ change . path } position ${ change . position } try ${ retryCount } ` ) ;
const uploader = await backupTargets . storageApi ( backupTarget ) . upload ( backupTarget . config , fullPath ) ;
2025-08-13 09:39:36 +05:30
const { stats , integrity } = await addFile ( dataLayout . toLocalPath ( './' + change . path ) , backupTarget . encryption , uploader , progressCallback ) ;
integrityMap . set ( destPath , integrity ) ;
aggredgatedStats . size += stats . size ;
2025-08-13 09:09:48 +05:30
} ) ;
}
}
2025-08-13 09:39:36 +05:30
const [ delError ] = await safe ( async . eachLimit ( delQueue , concurrency , async ( change ) => await processSyncerChange ( change , backupTarget , remotePath , dataLayout , progressCallback ) ) ) ;
2025-02-13 17:05:35 +01:00
debug ( 'sync: done processing deletes. error: %o' , delError ) ;
2025-07-14 21:57:20 +02:00
if ( delError ) throw delError ;
2025-02-13 17:05:35 +01:00
2025-08-13 09:39:36 +05:30
const [ addError ] = await safe ( async . eachLimit ( addQueue , concurrency , async ( change ) => await processSyncerChange ( change , backupTarget , remotePath , dataLayout , progressCallback ) ) ) ;
2025-02-13 17:05:35 +01:00
debug ( 'sync: done processing adds. error: %o' , addError ) ;
if ( addError ) throw addError ;
2025-08-13 09:39:36 +05:30
await syncer . finalize ( cacheFile ) ;
return {
stats : aggredgatedStats ,
integrity : [ ... integrityMap . entries ( ) ] . sort ( ( [ a ] , [ b ] ) => a < b ) // for readability, order the entries
} ;
2022-04-28 18:43:14 -07:00
}
// this is not part of 'snapshotting' because we need root access to traverse
async function saveFsMetadata ( dataLayout , metadataFile ) {
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof metadataFile , 'string' ) ;
// contains paths prefixed with './'
const metadata = {
emptyDirs : [ ] ,
execFiles : [ ] ,
symlinks : [ ]
} ;
// we assume small number of files. spawnSync will raise a ENOBUFS error after maxBuffer
2024-07-08 13:18:22 +02:00
for ( const lp of dataLayout . localPaths ( ) ) {
2025-01-03 13:01:10 +01:00
const [ emptyDirsError , emptyDirs ] = await safe ( shell . spawn ( 'find' , [ lp , '-type' , 'd' , '-empty' ] , { encoding : 'utf8' , maxLines : 50000 } ) ) ;
if ( emptyDirsError && emptyDirsError . stdoutLineCount >= 50000 ) throw new BoxError ( BoxError . FS _ERROR , ` Too many empty directories. Run "find ${ lp } -type d -empty" to investigate ` ) ;
2024-11-18 08:11:14 +05:30
if ( emptyDirsError ) throw emptyDirsError ;
2024-02-22 14:47:44 +01:00
if ( emptyDirs . length ) metadata . emptyDirs = metadata . emptyDirs . concat ( emptyDirs . trim ( ) . split ( '\n' ) . map ( ( ed ) => dataLayout . toRemotePath ( ed ) ) ) ;
2022-04-28 18:43:14 -07:00
2025-01-03 13:01:10 +01:00
const [ execFilesError , execFiles ] = await safe ( shell . spawn ( 'find' , [ lp , '-type' , 'f' , '-executable' ] , { encoding : 'utf8' , maxLines : 20000 } ) ) ;
if ( execFilesError && execFilesError . stdoutLineCount >= 20000 ) throw new BoxError ( BoxError . FS _ERROR , ` Too many executable files. Run "find ${ lp } -type f -executable" to investigate ` ) ;
2024-11-18 08:11:14 +05:30
if ( execFilesError ) throw execFilesError ;
2024-02-22 14:47:44 +01:00
if ( execFiles . length ) metadata . execFiles = metadata . execFiles . concat ( execFiles . trim ( ) . split ( '\n' ) . map ( ( ef ) => dataLayout . toRemotePath ( ef ) ) ) ;
2022-04-28 18:43:14 -07:00
2025-01-03 13:01:10 +01:00
const [ symlinkFilesError , symlinkFiles ] = await safe ( shell . spawn ( 'find' , [ lp , '-type' , 'l' ] , { encoding : 'utf8' , maxLines : 20000 } ) ) ;
if ( symlinkFilesError && symlinkFilesError . stdoutLineCount >= 20000 ) throw new BoxError ( BoxError . FS _ERROR , ` Too many symlinks. Run "find ${ lp } -type l" to investigate ` ) ;
2024-11-18 08:11:14 +05:30
if ( symlinkFilesError ) throw symlinkFilesError ;
2024-02-22 14:47:44 +01:00
if ( symlinkFiles . length ) metadata . symlinks = metadata . symlinks . concat ( symlinkFiles . trim ( ) . split ( '\n' ) . map ( ( sl ) => {
2022-04-28 18:43:14 -07:00
const target = safe . fs . readlinkSync ( sl ) ;
return { path : dataLayout . toRemotePath ( sl ) , target } ;
} ) ) ;
}
2025-08-13 15:55:00 +05:30
if ( ! safe . fs . writeFileSync ( metadataFile , JSON . stringify ( metadata , null , 2 ) ) ) throw new BoxError ( BoxError . FS _ERROR , ` Error writing fs metadata: ${ safe . error . message } ` ) ;
2022-04-28 18:43:14 -07:00
}
async function restoreFsMetadata ( dataLayout , metadataFile ) {
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof metadataFile , 'string' ) ;
debug ( ` Recreating empty directories in ${ dataLayout . toString ( ) } ` ) ;
const metadataJson = safe . fs . readFileSync ( metadataFile , 'utf8' ) ;
if ( metadataJson === null ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Error loading fsmetadata.json:' + safe . error . message ) ;
const metadata = safe . JSON . parse ( metadataJson ) ;
if ( metadata === null ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Error parsing fsmetadata.json:' + safe . error . message ) ;
for ( const emptyDir of metadata . emptyDirs ) {
const [ mkdirError ] = await safe ( fs . promises . mkdir ( dataLayout . toLocalPath ( emptyDir ) , { recursive : true } ) ) ;
if ( mkdirError ) throw new BoxError ( BoxError . FS _ERROR , ` unable to create path: ${ mkdirError . message } ` ) ;
}
for ( const execFile of metadata . execFiles ) {
const [ chmodError ] = await safe ( fs . promises . chmod ( dataLayout . toLocalPath ( execFile ) , parseInt ( '0755' , 8 ) ) ) ;
if ( chmodError ) throw new BoxError ( BoxError . FS _ERROR , ` unable to chmod: ${ chmodError . message } ` ) ;
}
for ( const symlink of ( metadata . symlinks || [ ] ) ) {
if ( ! symlink . target ) continue ;
// the path may not exist if we had a directory full of symlinks
const [ mkdirError ] = await safe ( fs . promises . mkdir ( path . dirname ( dataLayout . toLocalPath ( symlink . path ) ) , { recursive : true } ) ) ;
if ( mkdirError ) throw new BoxError ( BoxError . FS _ERROR , ` unable to symlink (mkdir): ${ mkdirError . message } ` ) ;
const [ symlinkError ] = await safe ( fs . promises . symlink ( symlink . target , dataLayout . toLocalPath ( symlink . path ) , 'file' ) ) ;
if ( symlinkError ) throw new BoxError ( BoxError . FS _ERROR , ` unable to symlink: ${ symlinkError . message } ` ) ;
}
}
2025-07-25 13:49:37 +02:00
async function downloadDir ( backupTarget , backupFilePath , dataLayout , progressCallback ) {
assert . strictEqual ( typeof backupTarget , 'object' ) ;
2022-04-28 18:43:14 -07:00
assert . strictEqual ( typeof backupFilePath , 'string' ) ;
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2025-07-25 13:49:37 +02:00
const encryptedFilenames = backupTarget . encryption ? . encryptedFilenames || false ;
debug ( ` downloadDir: ${ backupFilePath } to ${ dataLayout . toString ( ) } . encryption filenames: ${ encryptedFilenames } content: ${ ! ! backupTarget . encryption } ` ) ;
2022-04-28 18:43:14 -07:00
2023-08-01 19:03:24 +05:30
async function downloadFile ( entry ) {
2025-08-02 10:24:51 +02:00
let relativePath = path . relative ( backupFilePath , entry . path ) ;
2025-07-25 13:49:37 +02:00
if ( encryptedFilenames ) {
const { error , result } = hush . decryptFilePath ( relativePath , backupTarget . encryption ) ;
2023-08-01 19:03:24 +05:30
if ( error ) throw new BoxError ( BoxError . CRYPTO _ERROR , 'Unable to decrypt file' ) ;
2022-04-28 18:43:14 -07:00
relativePath = result ;
}
const destFilePath = dataLayout . toLocalPath ( './' + relativePath ) ;
2023-08-01 19:03:24 +05:30
const [ mkdirError ] = await safe ( fs . promises . mkdir ( path . dirname ( destFilePath ) , { recursive : true } ) ) ;
if ( mkdirError ) throw new BoxError ( BoxError . FS _ERROR , mkdirError . message ) ;
2022-04-28 18:43:14 -07:00
2025-07-14 16:07:32 +02:00
await promiseRetry ( { times : 3 , interval : 20000 } , async function ( ) {
2025-08-02 10:24:51 +02:00
const [ downloadError , sourceStream ] = await safe ( backupTargets . storageApi ( backupTarget ) . download ( backupTarget . config , entry . path ) ) ;
2023-08-01 19:03:24 +05:30
if ( downloadError ) {
2025-08-02 10:24:51 +02:00
progressCallback ( { message : ` Download ${ entry . path } to ${ destFilePath } errored: ${ downloadError . message } ` } ) ;
2023-08-01 19:03:24 +05:30
throw downloadError ;
}
2023-08-29 17:44:02 +05:30
const ps = new ProgressStream ( { interval : 10000 } ) ; // display a progress every 10 seconds
ps . on ( 'progress' , function ( progress ) {
2023-08-01 19:03:24 +05:30
const transferred = Math . round ( progress . transferred / 1024 / 1024 ) , speed = Math . round ( progress . speed / 1024 / 1024 ) ;
2025-08-02 10:24:51 +02:00
if ( ! transferred && ! speed ) return progressCallback ( { message : ` Downloading ${ entry . path } ` } ) ; // 0M@0MBps looks wrong
progressCallback ( { message : ` Downloading ${ entry . path } : ${ transferred } M@ ${ speed } MBps ` } ) ;
2023-08-01 19:03:24 +05:30
} ) ;
2022-04-28 18:43:14 -07:00
2023-08-29 17:44:02 +05:30
const destStream = fs . createWriteStream ( destFilePath ) ;
const streams = [ sourceStream , ps ] ;
2025-07-25 13:49:37 +02:00
if ( backupTarget . encryption ) {
const decryptStream = new DecryptStream ( backupTarget . encryption ) ;
2023-08-29 17:44:02 +05:30
streams . push ( decryptStream ) ;
}
streams . push ( destStream ) ;
2025-08-02 10:24:51 +02:00
progressCallback ( { message : ` Downloading ${ entry . path } to ${ destFilePath } ` } ) ;
2022-04-28 18:43:14 -07:00
2024-07-08 14:56:55 +02:00
const [ pipelineError ] = await safe ( stream . pipeline ( streams ) ) ;
2023-08-01 19:03:24 +05:30
if ( pipelineError ) {
2025-08-02 10:24:51 +02:00
progressCallback ( { message : ` Download error ${ entry . path } to ${ destFilePath } : ${ pipelineError . message } ` } ) ;
2023-08-01 19:03:24 +05:30
throw pipelineError ;
}
2025-08-02 10:24:51 +02:00
progressCallback ( { message : ` Download finished ${ entry . path } to ${ destFilePath } ` } ) ;
2022-04-28 18:43:14 -07:00
} ) ;
}
2025-02-12 18:46:54 +01:00
// https://www.digitalocean.com/community/questions/rate-limiting-on-spaces?answer=40441
2025-07-25 13:49:37 +02:00
const concurrency = backupTarget . limits ? . downloadConcurrency || ( backupTarget . provider === 's3' ? 30 : 10 ) ;
2025-02-12 18:46:54 +01:00
let marker = null ;
while ( true ) {
2025-08-01 14:54:32 +02:00
const batch = await backupTargets . storageApi ( backupTarget ) . listDir ( backupTarget . config , backupFilePath , marker === null ? 1 : 1000 , marker ) ; // try with one file first. if that works out, we continue faster
2025-02-12 18:46:54 +01:00
await async . eachLimit ( batch . entries , concurrency , downloadFile ) ;
if ( ! batch . marker ) break ;
marker = batch . marker ;
}
2022-04-28 18:43:14 -07:00
}
2025-07-25 13:49:37 +02:00
async function download ( backupTarget , remotePath , dataLayout , progressCallback ) {
assert . strictEqual ( typeof backupTarget , 'object' ) ;
2022-04-28 18:43:14 -07:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2025-08-02 01:46:29 +02:00
debug ( ` download: Downloading ${ remotePath } to ${ dataLayout . toString ( ) } ` ) ;
2022-04-28 18:43:14 -07:00
2025-08-02 01:46:29 +02:00
await downloadDir ( backupTarget , remotePath , dataLayout , progressCallback ) ;
2022-04-30 16:42:14 -07:00
await restoreFsMetadata ( dataLayout , ` ${ dataLayout . localRoot ( ) } /fsmetadata.json ` ) ;
2022-04-28 18:43:14 -07:00
}
2025-07-25 13:49:37 +02:00
async function upload ( backupTarget , remotePath , dataLayout , progressCallback ) {
assert . strictEqual ( typeof backupTarget , 'object' ) ;
2022-04-28 18:43:14 -07:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
assert . strictEqual ( typeof dataLayout , 'object' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2022-04-30 16:42:14 -07:00
await saveFsMetadata ( dataLayout , ` ${ dataLayout . localRoot ( ) } /fsmetadata.json ` ) ;
2025-08-13 09:39:36 +05:30
const { stats , integrity } = await sync ( backupTarget , remotePath , dataLayout , progressCallback ) ;
2025-08-13 17:06:55 +05:30
const integrityDataJsonString = JSON . stringify ( Object . fromEntries ( integrity ) , null , 2 ) ;
2025-08-13 09:39:36 +05:30
const integrityDataStream = Readable . from ( integrityDataJsonString ) ;
const integrityUploader = await backupTargets . storageApi ( backupTarget ) . upload ( backupTarget . config , ` ${ remotePath } .checksum ` ) ;
await stream . pipeline ( integrityDataStream , integrityUploader . stream ) ;
await integrityUploader . finish ( ) ;
const signature = await crypto . sign ( null /* algorithm */ , integrityDataJsonString , backupTarget . integrityKeyPair . privateKey ) ;
return { stats , integrity : { signature } } ;
2022-04-28 18:43:14 -07:00
}
2025-08-01 22:58:19 +02:00
function getFileExtension ( encryption ) {
assert . strictEqual ( typeof encryption , 'boolean' ) ;
2025-08-02 01:46:29 +02:00
return '' ; // this also signals to backupcleanear that we are dealing with directories
2025-08-01 22:58:19 +02:00
}