2022-04-28 18:43:14 -07:00
'use strict' ;
exports = module . exports = {
download ,
upload ,
_saveFsMetadata : saveFsMetadata ,
_restoreFsMetadata : restoreFsMetadata
} ;
const assert = require ( 'assert' ) ,
async = require ( 'async' ) ,
2025-08-01 14:54:32 +02:00
backupTargets = require ( '../backuptargets.js' ) ,
2022-04-28 18:43:14 -07:00
BoxError = require ( '../boxerror.js' ) ,
DataLayout = require ( '../datalayout.js' ) ,
2023-08-29 17:44:02 +05:30
{ DecryptStream } = require ( '../hush.js' ) ,
2022-04-28 18:43:14 -07:00
debug = require ( 'debug' ) ( 'box:backupformat/rsync' ) ,
2024-07-08 14:56:55 +02:00
{ EncryptStream } = require ( '../hush.js' ) ,
2022-04-28 18:43:14 -07:00
fs = require ( 'fs' ) ,
hush = require ( '../hush.js' ) ,
path = require ( 'path' ) ,
2025-07-30 11:19:07 +02:00
paths = require ( '../paths.js' ) ,
2023-08-29 17:44:02 +05:30
ProgressStream = require ( '../progress-stream.js' ) ,
2023-08-01 19:03:24 +05:30
promiseRetry = require ( '../promise-retry.js' ) ,
2022-04-28 18:43:14 -07:00
safe = require ( 'safetydance' ) ,
2024-10-14 19:10:31 +02:00
shell = require ( '../shell.js' ) ( 'backupformat/rsync' ) ,
2024-07-08 14:56:55 +02:00
stream = require ( 'stream/promises' ) ,
2025-02-12 18:46:54 +01:00
syncer = require ( '../syncer.js' ) ;
2022-04-28 18:43:14 -07:00
2024-07-08 14:56:55 +02:00
async function addFile ( sourceFile , encryption , uploader , progressCallback ) {
assert . strictEqual ( typeof sourceFile , 'string' ) ;
assert . strictEqual ( typeof encryption , 'object' ) ;
assert . strictEqual ( typeof uploader , 'object' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
// make sure file can be opened for reading before we start the pipeline. otherwise, we end up with
// destinations dirs/file which are owned by root (this process id) and cannot be copied (run as normal user)
const [ openError , sourceHandle ] = await safe ( fs . promises . open ( sourceFile , 'r' ) ) ;
if ( openError ) {
debug ( ` addFile: ignoring disappeared file: ${ sourceFile } ` ) ;
return ;
}
const sourceStream = sourceHandle . createReadStream ( sourceFile , { autoClose : true } ) ;
const ps = new ProgressStream ( { interval : 10000 } ) ; // display a progress every 10 seconds
ps . on ( 'progress' , function ( progress ) {
const transferred = Math . round ( progress . transferred / 1024 / 1024 ) , speed = Math . round ( progress . speed / 1024 / 1024 ) ;
if ( ! transferred && ! speed ) return progressCallback ( { message : ` Uploading ${ sourceFile } ` } ) ; // 0M@0MBps looks wrong
progressCallback ( { message : ` Uploading ${ sourceFile } : ${ transferred } M@ ${ speed } MBps ` } ) ; // 0M@0MBps looks wrong
} ) ;
let pipeline = null ;
if ( encryption ) {
const encryptStream = new EncryptStream ( encryption ) ;
pipeline = safe ( stream . pipeline ( sourceStream , encryptStream , ps , uploader . stream ) ) ;
} else {
pipeline = safe ( stream . pipeline ( sourceStream , ps , uploader . stream ) ) ;
}
const [ error ] = await safe ( pipeline ) ;
if ( error && error . message . includes ( 'ENOENT' ) ) { // ignore error if file disappears
}
if ( error ) throw new BoxError ( BoxError . EXTERNAL _ERROR , ` tarPack pipeline error: ${ error . message } ` ) ;
// debug(`addFile: pipeline finished: ${JSON.stringify(ps.stats())}`);
await uploader . finish ( ) ;
}
2025-07-25 13:49:37 +02:00
async function processSyncerChange ( change , backupTarget , remotePath , dataLayout , progressCallback ) {
2025-02-13 17:05:35 +01:00
debug ( 'sync: processing task: %j' , change ) ;
// the empty task.path is special to signify the directory
2025-07-25 13:49:37 +02:00
const destPath = change . path && backupTarget . encryption ? . encryptedFilenames ? hush . encryptFilePath ( change . path , backupTarget . encryption ) : change . path ;
2025-08-01 20:49:11 +02:00
const backupFilePath = path . join ( backupTargets . getBackupFilePath ( backupTarget , remotePath ) , destPath ) ;
2025-02-13 17:05:35 +01:00
if ( change . operation === 'removedir' ) {
debug ( ` Removing directory ${ backupFilePath } ` ) ;
2025-08-01 14:54:32 +02:00
await backupTargets . storageApi ( backupTarget ) . removeDir ( backupTarget . config , backupFilePath , progressCallback ) ;
2025-02-13 17:05:35 +01:00
} else if ( change . operation === 'remove' ) {
debug ( ` Removing ${ backupFilePath } ` ) ;
2025-08-01 14:54:32 +02:00
await backupTargets . storageApi ( backupTarget ) . remove ( backupTarget . config , backupFilePath ) ;
2025-02-13 17:05:35 +01:00
} else if ( change . operation === 'add' ) {
await promiseRetry ( { times : 5 , interval : 20000 , debug } , async ( retryCount ) => {
progressCallback ( { message : ` Adding ${ change . path } ` + ( retryCount > 1 ? ` (Try ${ retryCount } ) ` : '' ) } ) ;
debug ( ` Adding ${ change . path } position ${ change . position } try ${ retryCount } ` ) ;
2025-08-01 14:54:32 +02:00
const uploader = await backupTargets . storageApi ( backupTarget ) . upload ( backupTarget . config , backupFilePath ) ;
2025-07-25 13:49:37 +02:00
await addFile ( dataLayout . toLocalPath ( './' + change . path ) , backupTarget . encryption , uploader , progressCallback ) ;
2025-02-13 17:05:35 +01:00
} ) ;
}
}
2025-07-25 13:49:37 +02:00
async function sync ( backupTarget , remotePath , dataLayout , progressCallback ) {
assert . strictEqual ( typeof backupTarget , 'object' ) ;
2022-04-28 18:43:14 -07:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
// the number here has to take into account the s3.upload partSize (which is 10MB). So 20=200MB
2025-07-25 13:49:37 +02:00
const concurrency = backupTarget . limits ? . syncConcurrency || ( backupTarget . provider === 's3' ? 20 : 10 ) ;
2022-04-28 18:43:14 -07:00
2025-07-30 11:19:07 +02:00
const cacheFile = path . join ( paths . BACKUP _INFO _DIR , backupTarget . id , ` ${ dataLayout . getBasename ( ) } .sync.cache ` ) ;
const changes = await syncer . sync ( dataLayout , { cacheFile } ) ;
2025-02-13 17:05:35 +01:00
debug ( ` sync: processing ${ changes . delQueue . length } deletes and ${ changes . addQueue . length } additions ` ) ;
2025-07-25 13:49:37 +02:00
const [ delError ] = await safe ( async . eachLimit ( changes . delQueue , concurrency , async ( change ) => await processSyncerChange ( change , backupTarget , remotePath , dataLayout , progressCallback ) ) ) ;
2025-02-13 17:05:35 +01:00
debug ( 'sync: done processing deletes. error: %o' , delError ) ;
2025-07-14 21:57:20 +02:00
if ( delError ) throw delError ;
2025-02-13 17:05:35 +01:00
2025-07-25 13:49:37 +02:00
const [ addError ] = await safe ( async . eachLimit ( changes . addQueue , concurrency , async ( change ) => await processSyncerChange ( change , backupTarget , remotePath , dataLayout , progressCallback ) ) ) ;
2025-02-13 17:05:35 +01:00
debug ( 'sync: done processing adds. error: %o' , addError ) ;
if ( addError ) throw addError ;
await syncer . finalize ( changes ) ;
2022-04-28 18:43:14 -07:00
}
// this is not part of 'snapshotting' because we need root access to traverse
async function saveFsMetadata ( dataLayout , metadataFile ) {
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof metadataFile , 'string' ) ;
// contains paths prefixed with './'
const metadata = {
emptyDirs : [ ] ,
execFiles : [ ] ,
symlinks : [ ]
} ;
// we assume small number of files. spawnSync will raise a ENOBUFS error after maxBuffer
2024-07-08 13:18:22 +02:00
for ( const lp of dataLayout . localPaths ( ) ) {
2025-01-03 13:01:10 +01:00
const [ emptyDirsError , emptyDirs ] = await safe ( shell . spawn ( 'find' , [ lp , '-type' , 'd' , '-empty' ] , { encoding : 'utf8' , maxLines : 50000 } ) ) ;
if ( emptyDirsError && emptyDirsError . stdoutLineCount >= 50000 ) throw new BoxError ( BoxError . FS _ERROR , ` Too many empty directories. Run "find ${ lp } -type d -empty" to investigate ` ) ;
2024-11-18 08:11:14 +05:30
if ( emptyDirsError ) throw emptyDirsError ;
2024-02-22 14:47:44 +01:00
if ( emptyDirs . length ) metadata . emptyDirs = metadata . emptyDirs . concat ( emptyDirs . trim ( ) . split ( '\n' ) . map ( ( ed ) => dataLayout . toRemotePath ( ed ) ) ) ;
2022-04-28 18:43:14 -07:00
2025-01-03 13:01:10 +01:00
const [ execFilesError , execFiles ] = await safe ( shell . spawn ( 'find' , [ lp , '-type' , 'f' , '-executable' ] , { encoding : 'utf8' , maxLines : 20000 } ) ) ;
if ( execFilesError && execFilesError . stdoutLineCount >= 20000 ) throw new BoxError ( BoxError . FS _ERROR , ` Too many executable files. Run "find ${ lp } -type f -executable" to investigate ` ) ;
2024-11-18 08:11:14 +05:30
if ( execFilesError ) throw execFilesError ;
2024-02-22 14:47:44 +01:00
if ( execFiles . length ) metadata . execFiles = metadata . execFiles . concat ( execFiles . trim ( ) . split ( '\n' ) . map ( ( ef ) => dataLayout . toRemotePath ( ef ) ) ) ;
2022-04-28 18:43:14 -07:00
2025-01-03 13:01:10 +01:00
const [ symlinkFilesError , symlinkFiles ] = await safe ( shell . spawn ( 'find' , [ lp , '-type' , 'l' ] , { encoding : 'utf8' , maxLines : 20000 } ) ) ;
if ( symlinkFilesError && symlinkFilesError . stdoutLineCount >= 20000 ) throw new BoxError ( BoxError . FS _ERROR , ` Too many symlinks. Run "find ${ lp } -type l" to investigate ` ) ;
2024-11-18 08:11:14 +05:30
if ( symlinkFilesError ) throw symlinkFilesError ;
2024-02-22 14:47:44 +01:00
if ( symlinkFiles . length ) metadata . symlinks = metadata . symlinks . concat ( symlinkFiles . trim ( ) . split ( '\n' ) . map ( ( sl ) => {
2022-04-28 18:43:14 -07:00
const target = safe . fs . readlinkSync ( sl ) ;
return { path : dataLayout . toRemotePath ( sl ) , target } ;
} ) ) ;
}
if ( ! safe . fs . writeFileSync ( metadataFile , JSON . stringify ( metadata , null , 4 ) ) ) throw new BoxError ( BoxError . FS _ERROR , ` Error writing fs metadata: ${ safe . error . message } ` ) ;
}
async function restoreFsMetadata ( dataLayout , metadataFile ) {
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof metadataFile , 'string' ) ;
debug ( ` Recreating empty directories in ${ dataLayout . toString ( ) } ` ) ;
const metadataJson = safe . fs . readFileSync ( metadataFile , 'utf8' ) ;
if ( metadataJson === null ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Error loading fsmetadata.json:' + safe . error . message ) ;
const metadata = safe . JSON . parse ( metadataJson ) ;
if ( metadata === null ) throw new BoxError ( BoxError . EXTERNAL _ERROR , 'Error parsing fsmetadata.json:' + safe . error . message ) ;
for ( const emptyDir of metadata . emptyDirs ) {
const [ mkdirError ] = await safe ( fs . promises . mkdir ( dataLayout . toLocalPath ( emptyDir ) , { recursive : true } ) ) ;
if ( mkdirError ) throw new BoxError ( BoxError . FS _ERROR , ` unable to create path: ${ mkdirError . message } ` ) ;
}
for ( const execFile of metadata . execFiles ) {
const [ chmodError ] = await safe ( fs . promises . chmod ( dataLayout . toLocalPath ( execFile ) , parseInt ( '0755' , 8 ) ) ) ;
if ( chmodError ) throw new BoxError ( BoxError . FS _ERROR , ` unable to chmod: ${ chmodError . message } ` ) ;
}
for ( const symlink of ( metadata . symlinks || [ ] ) ) {
if ( ! symlink . target ) continue ;
// the path may not exist if we had a directory full of symlinks
const [ mkdirError ] = await safe ( fs . promises . mkdir ( path . dirname ( dataLayout . toLocalPath ( symlink . path ) ) , { recursive : true } ) ) ;
if ( mkdirError ) throw new BoxError ( BoxError . FS _ERROR , ` unable to symlink (mkdir): ${ mkdirError . message } ` ) ;
const [ symlinkError ] = await safe ( fs . promises . symlink ( symlink . target , dataLayout . toLocalPath ( symlink . path ) , 'file' ) ) ;
if ( symlinkError ) throw new BoxError ( BoxError . FS _ERROR , ` unable to symlink: ${ symlinkError . message } ` ) ;
}
}
2025-07-25 13:49:37 +02:00
async function downloadDir ( backupTarget , backupFilePath , dataLayout , progressCallback ) {
assert . strictEqual ( typeof backupTarget , 'object' ) ;
2022-04-28 18:43:14 -07:00
assert . strictEqual ( typeof backupFilePath , 'string' ) ;
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2025-07-25 13:49:37 +02:00
const encryptedFilenames = backupTarget . encryption ? . encryptedFilenames || false ;
debug ( ` downloadDir: ${ backupFilePath } to ${ dataLayout . toString ( ) } . encryption filenames: ${ encryptedFilenames } content: ${ ! ! backupTarget . encryption } ` ) ;
2022-04-28 18:43:14 -07:00
2023-08-01 19:03:24 +05:30
async function downloadFile ( entry ) {
2022-04-28 18:43:14 -07:00
let relativePath = path . relative ( backupFilePath , entry . fullPath ) ;
2025-07-25 13:49:37 +02:00
if ( encryptedFilenames ) {
const { error , result } = hush . decryptFilePath ( relativePath , backupTarget . encryption ) ;
2023-08-01 19:03:24 +05:30
if ( error ) throw new BoxError ( BoxError . CRYPTO _ERROR , 'Unable to decrypt file' ) ;
2022-04-28 18:43:14 -07:00
relativePath = result ;
}
const destFilePath = dataLayout . toLocalPath ( './' + relativePath ) ;
2023-08-01 19:03:24 +05:30
const [ mkdirError ] = await safe ( fs . promises . mkdir ( path . dirname ( destFilePath ) , { recursive : true } ) ) ;
if ( mkdirError ) throw new BoxError ( BoxError . FS _ERROR , mkdirError . message ) ;
2022-04-28 18:43:14 -07:00
2025-07-14 16:07:32 +02:00
await promiseRetry ( { times : 3 , interval : 20000 } , async function ( ) {
2025-08-01 14:54:32 +02:00
const [ downloadError , sourceStream ] = await safe ( backupTargets . storageApi ( backupTarget ) . download ( backupTarget . config , entry . fullPath ) ) ;
2023-08-01 19:03:24 +05:30
if ( downloadError ) {
progressCallback ( { message : ` Download ${ entry . fullPath } to ${ destFilePath } errored: ${ downloadError . message } ` } ) ;
throw downloadError ;
}
2023-08-29 17:44:02 +05:30
const ps = new ProgressStream ( { interval : 10000 } ) ; // display a progress every 10 seconds
ps . on ( 'progress' , function ( progress ) {
2023-08-01 19:03:24 +05:30
const transferred = Math . round ( progress . transferred / 1024 / 1024 ) , speed = Math . round ( progress . speed / 1024 / 1024 ) ;
if ( ! transferred && ! speed ) return progressCallback ( { message : ` Downloading ${ entry . fullPath } ` } ) ; // 0M@0MBps looks wrong
progressCallback ( { message : ` Downloading ${ entry . fullPath } : ${ transferred } M@ ${ speed } MBps ` } ) ;
} ) ;
2022-04-28 18:43:14 -07:00
2023-08-29 17:44:02 +05:30
const destStream = fs . createWriteStream ( destFilePath ) ;
const streams = [ sourceStream , ps ] ;
2025-07-25 13:49:37 +02:00
if ( backupTarget . encryption ) {
const decryptStream = new DecryptStream ( backupTarget . encryption ) ;
2023-08-29 17:44:02 +05:30
streams . push ( decryptStream ) ;
}
streams . push ( destStream ) ;
2023-08-01 19:03:24 +05:30
progressCallback ( { message : ` Downloading ${ entry . fullPath } to ${ destFilePath } ` } ) ;
2022-04-28 18:43:14 -07:00
2024-07-08 14:56:55 +02:00
const [ pipelineError ] = await safe ( stream . pipeline ( streams ) ) ;
2023-08-01 19:03:24 +05:30
if ( pipelineError ) {
progressCallback ( { message : ` Download error ${ entry . fullPath } to ${ destFilePath } : ${ pipelineError . message } ` } ) ;
throw pipelineError ;
}
progressCallback ( { message : ` Download finished ${ entry . fullPath } to ${ destFilePath } ` } ) ;
2022-04-28 18:43:14 -07:00
} ) ;
}
2025-02-12 18:46:54 +01:00
// https://www.digitalocean.com/community/questions/rate-limiting-on-spaces?answer=40441
2025-07-25 13:49:37 +02:00
const concurrency = backupTarget . limits ? . downloadConcurrency || ( backupTarget . provider === 's3' ? 30 : 10 ) ;
2025-02-12 18:46:54 +01:00
let marker = null ;
while ( true ) {
2025-08-01 14:54:32 +02:00
const batch = await backupTargets . storageApi ( backupTarget ) . listDir ( backupTarget . config , backupFilePath , marker === null ? 1 : 1000 , marker ) ; // try with one file first. if that works out, we continue faster
2025-02-12 18:46:54 +01:00
await async . eachLimit ( batch . entries , concurrency , downloadFile ) ;
if ( ! batch . marker ) break ;
marker = batch . marker ;
}
2022-04-28 18:43:14 -07:00
}
2025-07-25 13:49:37 +02:00
async function download ( backupTarget , remotePath , dataLayout , progressCallback ) {
assert . strictEqual ( typeof backupTarget , 'object' ) ;
2022-04-28 18:43:14 -07:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
assert ( dataLayout instanceof DataLayout , 'dataLayout must be a DataLayout' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2025-08-01 20:49:11 +02:00
const backupFilePath = backupTargets . getBackupFilePath ( backupTarget , remotePath ) ;
2022-04-28 18:43:14 -07:00
2023-08-02 09:50:02 +05:30
debug ( ` download: Downloading ${ backupFilePath } to ${ dataLayout . toString ( ) } ` ) ;
2025-07-25 13:49:37 +02:00
await downloadDir ( backupTarget , backupFilePath , dataLayout , progressCallback ) ;
2022-04-30 16:42:14 -07:00
await restoreFsMetadata ( dataLayout , ` ${ dataLayout . localRoot ( ) } /fsmetadata.json ` ) ;
2022-04-28 18:43:14 -07:00
}
2025-07-25 13:49:37 +02:00
async function upload ( backupTarget , remotePath , dataLayout , progressCallback ) {
assert . strictEqual ( typeof backupTarget , 'object' ) ;
2022-04-28 18:43:14 -07:00
assert . strictEqual ( typeof remotePath , 'string' ) ;
assert . strictEqual ( typeof dataLayout , 'object' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2022-04-30 16:42:14 -07:00
await saveFsMetadata ( dataLayout , ` ${ dataLayout . localRoot ( ) } /fsmetadata.json ` ) ;
2025-07-25 13:49:37 +02:00
await sync ( backupTarget , remotePath , dataLayout , progressCallback ) ;
2022-04-28 18:43:14 -07:00
}