2021-07-14 11:07:19 -07:00
'use strict' ;
2022-03-30 10:17:20 -07:00
const BoxError = require ( './boxerror.js' ) ;
2021-07-14 11:07:19 -07:00
exports = module . exports = {
run ,
2023-07-12 10:01:53 +05:30
_applyBackupRetention : applyBackupRetention
2021-07-14 11:07:19 -07:00
} ;
const apps = require ( './apps.js' ) ,
2024-12-10 10:06:52 +01:00
archives = require ( './archives.js' ) ,
2021-07-14 11:07:19 -07:00
assert = require ( 'assert' ) ,
2022-04-28 18:43:14 -07:00
backupFormat = require ( './backupformat.js' ) ,
2021-07-14 11:07:19 -07:00
backups = require ( './backups.js' ) ,
constants = require ( './constants.js' ) ,
debug = require ( 'debug' ) ( 'box:backupcleaner' ) ,
moment = require ( 'moment' ) ,
path = require ( 'path' ) ,
paths = require ( './paths.js' ) ,
safe = require ( 'safetydance' ) ,
2023-01-31 11:17:25 +01:00
storage = require ( './storage.js' ) ;
2021-07-14 11:07:19 -07:00
2023-07-12 10:01:53 +05:30
function applyBackupRetention ( allBackups , retention , referencedBackupIds ) {
2021-07-14 19:03:12 -07:00
assert ( Array . isArray ( allBackups ) ) ;
2023-07-12 10:01:53 +05:30
assert . strictEqual ( typeof retention , 'object' ) ;
2021-07-14 11:07:19 -07:00
assert ( Array . isArray ( referencedBackupIds ) ) ;
const now = new Date ( ) ;
2021-07-14 19:03:12 -07:00
for ( const backup of allBackups ) {
2024-12-10 10:06:52 +01:00
if ( backup . state === backups . BACKUP _STATE _ERROR ) {
2021-07-14 11:07:19 -07:00
backup . discardReason = 'error' ;
} else if ( backup . state === backups . BACKUP _STATE _CREATING ) {
if ( ( now - backup . creationTime ) < 48 * 60 * 60 * 1000 ) backup . keepReason = 'creating' ;
else backup . discardReason = 'creating-too-long' ;
2024-12-10 10:06:52 +01:00
} else if ( referencedBackupIds . includes ( backup . id ) ) { // could also be in archives
2023-01-31 10:56:37 +01:00
backup . keepReason = 'referenced' ;
2022-04-05 11:08:38 -07:00
} else if ( ( backup . preserveSecs === - 1 ) || ( ( now - backup . creationTime ) < ( backup . preserveSecs * 1000 ) ) ) {
2021-07-14 11:07:19 -07:00
backup . keepReason = 'preserveSecs' ;
2023-07-12 10:01:53 +05:30
} else if ( ( now - backup . creationTime < retention . keepWithinSecs * 1000 ) || retention . keepWithinSecs < 0 ) {
2021-07-14 11:07:19 -07:00
backup . keepReason = 'keepWithinSecs' ;
}
}
const KEEP _FORMATS = {
keepDaily : 'Y-M-D' ,
keepWeekly : 'Y-W' ,
keepMonthly : 'Y-M' ,
keepYearly : 'Y'
} ;
for ( const format of [ 'keepDaily' , 'keepWeekly' , 'keepMonthly' , 'keepYearly' ] ) {
2023-07-12 10:01:53 +05:30
if ( ! ( format in retention ) ) continue ;
2021-07-14 11:07:19 -07:00
2023-07-12 10:01:53 +05:30
const n = retention [ format ] ; // we want to keep "n" backups of format
2021-07-14 11:07:19 -07:00
if ( ! n ) continue ; // disabled rule
let lastPeriod = null , keptSoFar = 0 ;
2021-07-14 19:03:12 -07:00
for ( const backup of allBackups ) {
2021-07-14 11:07:19 -07:00
if ( backup . discardReason ) continue ; // already discarded for some reason
2023-01-31 10:56:37 +01:00
if ( backup . keepReason && backup . keepReason !== 'referenced' ) continue ; // kept for some other reason
2021-07-14 11:07:19 -07:00
const period = moment ( backup . creationTime ) . format ( KEEP _FORMATS [ format ] ) ;
if ( period === lastPeriod ) continue ; // already kept for this period
lastPeriod = period ;
backup . keepReason = backup . keepReason ? ` ${ backup . keepReason } + ${ format } ` : format ;
if ( ++ keptSoFar === n ) break ;
}
}
2023-07-12 10:01:53 +05:30
if ( retention . keepLatest ) {
2024-08-15 15:46:19 +02:00
const latestNormalBackup = allBackups . find ( b => b . state === backups . BACKUP _STATE _NORMAL ) ;
2021-07-14 11:07:19 -07:00
if ( latestNormalBackup && ! latestNormalBackup . keepReason ) latestNormalBackup . keepReason = 'latest' ;
}
2021-07-14 19:03:12 -07:00
for ( const backup of allBackups ) {
2024-12-10 10:06:52 +01:00
debug ( ` applyBackupRetention: ${ backup . remotePath } keep/discard: ${ backup . keepReason || backup . discardReason || 'unprocessed' } ` ) ;
2021-07-14 11:07:19 -07:00
}
}
2022-04-04 14:13:27 -07:00
async function removeBackup ( backupConfig , backup , progressCallback ) {
2021-07-14 11:07:19 -07:00
assert . strictEqual ( typeof backupConfig , 'object' ) ;
assert . strictEqual ( typeof backup , 'object' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2022-04-28 18:43:14 -07:00
const backupFilePath = backupFormat . api ( backup . format ) . getBackupFilePath ( backupConfig , backup . remotePath ) ;
2021-07-14 11:07:19 -07:00
2022-04-14 16:07:01 -05:00
let removeError ;
if ( backup . format === 'tgz' ) {
progressCallback ( { message : ` ${ backup . remotePath } : Removing ${ backupFilePath } ` } ) ;
[ removeError ] = await safe ( storage . api ( backupConfig . provider ) . remove ( backupConfig , backupFilePath ) ) ;
} else {
progressCallback ( { message : ` ${ backup . remotePath } : Removing directory ${ backupFilePath } ` } ) ;
[ removeError ] = await safe ( storage . api ( backupConfig . provider ) . removeDir ( backupConfig , backupFilePath , progressCallback ) ) ;
}
2021-07-14 11:07:19 -07:00
2022-04-14 16:07:01 -05:00
if ( removeError ) {
2023-01-31 10:56:37 +01:00
debug ( ` removeBackup: error removing backup ${ removeError . message } ` ) ;
2022-04-14 16:07:01 -05:00
return ;
}
// prune empty directory if possible
const [ pruneError ] = await safe ( storage . api ( backupConfig . provider ) . remove ( backupConfig , path . dirname ( backupFilePath ) ) ) ;
2023-01-31 10:56:37 +01:00
if ( pruneError ) debug ( ` removeBackup: unable to prune backup directory ${ path . dirname ( backupFilePath ) } : ${ pruneError . message } ` ) ;
2022-04-14 16:07:01 -05:00
const [ delError ] = await safe ( backups . del ( backup . id ) ) ;
2023-04-16 10:49:59 +02:00
if ( delError ) debug ( ` removeBackup: error removing ${ backup . id } from database. %o ` , delError ) ;
2022-04-14 16:07:01 -05:00
else debug ( ` removeBackup: removed ${ backup . remotePath } ` ) ;
2021-07-14 11:07:19 -07:00
}
2023-07-12 10:01:53 +05:30
async function cleanupAppBackups ( backupConfig , retention , referencedBackupIds , progressCallback ) {
2021-07-14 11:07:19 -07:00
assert . strictEqual ( typeof backupConfig , 'object' ) ;
2023-07-12 10:01:53 +05:30
assert . strictEqual ( typeof retention , 'object' ) ;
2022-04-04 14:13:27 -07:00
assert ( Array . isArray ( referencedBackupIds ) ) ;
2021-07-14 11:07:19 -07:00
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2022-04-04 14:13:27 -07:00
const removedAppBackupPaths = [ ] ;
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
const allApps = await apps . list ( ) ;
const allAppIds = allApps . map ( a => a . id ) ;
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
const appBackups = await backups . getByTypePaged ( backups . BACKUP _TYPE _APP , 1 , 1000 ) ;
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
// collate the backups by app id. note that the app could already have been uninstalled
2024-08-15 15:46:19 +02:00
const appBackupsById = { } ;
2021-08-20 09:19:44 -07:00
for ( const appBackup of appBackups ) {
if ( ! appBackupsById [ appBackup . identifier ] ) appBackupsById [ appBackup . identifier ] = [ ] ;
appBackupsById [ appBackup . identifier ] . push ( appBackup ) ;
}
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
// apply backup policy per app. keep latest backup only for existing apps
let appBackupsToRemove = [ ] ;
for ( const appId of Object . keys ( appBackupsById ) ) {
2023-07-12 10:01:53 +05:30
const appRetention = Object . assign ( { keepLatest : allAppIds . includes ( appId ) } , retention ) ;
debug ( ` cleanupAppBackups: applying retention for appId ${ appId } retention: ${ JSON . stringify ( appRetention ) } ` ) ;
applyBackupRetention ( appBackupsById [ appId ] , appRetention , referencedBackupIds ) ;
2021-08-20 09:19:44 -07:00
appBackupsToRemove = appBackupsToRemove . concat ( appBackupsById [ appId ] . filter ( b => ! b . keepReason ) ) ;
}
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
for ( const appBackup of appBackupsToRemove ) {
await progressCallback ( { message : ` Removing app backup ( ${ appBackup . identifier } ): ${ appBackup . id } ` } ) ;
2022-04-04 14:13:27 -07:00
removedAppBackupPaths . push ( appBackup . remotePath ) ;
await removeBackup ( backupConfig , appBackup , progressCallback ) ; // never errors
2021-08-20 09:19:44 -07:00
}
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
debug ( 'cleanupAppBackups: done' ) ;
2021-07-14 11:07:19 -07:00
2022-04-04 14:13:27 -07:00
return removedAppBackupPaths ;
2021-07-14 11:07:19 -07:00
}
2023-07-12 10:01:53 +05:30
async function cleanupMailBackups ( backupConfig , retention , referencedBackupIds , progressCallback ) {
2021-11-16 19:52:51 -08:00
assert . strictEqual ( typeof backupConfig , 'object' ) ;
2023-07-12 10:01:53 +05:30
assert . strictEqual ( typeof retention , 'object' ) ;
2022-04-04 14:13:27 -07:00
assert ( Array . isArray ( referencedBackupIds ) ) ;
2021-11-16 19:52:51 -08:00
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2022-04-04 14:13:27 -07:00
const removedMailBackupPaths = [ ] ;
2021-11-16 19:52:51 -08:00
const mailBackups = await backups . getByTypePaged ( backups . BACKUP _TYPE _MAIL , 1 , 1000 ) ;
2023-07-12 10:01:53 +05:30
applyBackupRetention ( mailBackups , Object . assign ( { keepLatest : true } , retention ) , referencedBackupIds ) ;
2021-11-16 19:52:51 -08:00
for ( const mailBackup of mailBackups ) {
if ( mailBackup . keepReason ) continue ;
2022-04-04 14:13:27 -07:00
await progressCallback ( { message : ` Removing mail backup ${ mailBackup . remotePath } ` } ) ;
removedMailBackupPaths . push ( mailBackup . remotePath ) ;
await removeBackup ( backupConfig , mailBackup , progressCallback ) ; // never errors
2021-11-16 19:52:51 -08:00
}
debug ( 'cleanupMailBackups: done' ) ;
2022-04-04 14:13:27 -07:00
return removedMailBackupPaths ;
2021-11-16 19:52:51 -08:00
}
2023-07-12 10:01:53 +05:30
async function cleanupBoxBackups ( backupConfig , retention , progressCallback ) {
2021-07-14 11:07:19 -07:00
assert . strictEqual ( typeof backupConfig , 'object' ) ;
2023-07-12 10:01:53 +05:30
assert . strictEqual ( typeof retention , 'object' ) ;
2021-07-14 11:07:19 -07:00
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2024-08-15 15:46:19 +02:00
let referencedBackupIds = [ ] ;
const removedBoxBackupPaths = [ ] ;
2021-07-14 11:07:19 -07:00
const boxBackups = await backups . getByTypePaged ( backups . BACKUP _TYPE _BOX , 1 , 1000 ) ;
2023-07-12 10:01:53 +05:30
applyBackupRetention ( boxBackups , Object . assign ( { keepLatest : true } , retention ) , [ ] /* references */ ) ;
2021-07-14 11:07:19 -07:00
for ( const boxBackup of boxBackups ) {
if ( boxBackup . keepReason ) {
2022-04-04 14:13:27 -07:00
referencedBackupIds = referencedBackupIds . concat ( boxBackup . dependsOn ) ;
2021-07-14 11:07:19 -07:00
continue ;
}
2022-04-04 14:13:27 -07:00
await progressCallback ( { message : ` Removing box backup ${ boxBackup . remotePath } ` } ) ;
2021-07-14 11:07:19 -07:00
2022-04-04 14:13:27 -07:00
removedBoxBackupPaths . push ( boxBackup . remotePath ) ;
await removeBackup ( backupConfig , boxBackup , progressCallback ) ;
2021-07-14 11:07:19 -07:00
}
debug ( 'cleanupBoxBackups: done' ) ;
2022-04-04 14:13:27 -07:00
return { removedBoxBackupPaths , referencedBackupIds } ;
2021-07-14 11:07:19 -07:00
}
2024-08-15 15:51:38 +02:00
// cleans up the database by checking if backup exists in the remote. this can happen if user had set some bucket policy
2021-07-14 11:07:19 -07:00
async function cleanupMissingBackups ( backupConfig , progressCallback ) {
assert . strictEqual ( typeof backupConfig , 'object' ) ;
assert . strictEqual ( typeof progressCallback , 'function' ) ;
const perPage = 1000 ;
2022-04-14 08:07:03 -05:00
const missingBackupPaths = [ ] ;
2021-07-14 11:07:19 -07:00
2022-04-04 14:13:27 -07:00
if ( constants . TEST ) return missingBackupPaths ;
2021-07-14 11:07:19 -07:00
2021-09-26 21:59:48 -07:00
let page = 1 , result = [ ] ;
do {
2021-07-14 11:07:19 -07:00
result = await backups . list ( page , perPage ) ;
for ( const backup of result ) {
2024-08-15 15:51:38 +02:00
if ( backup . state !== backups . BACKUP _STATE _NORMAL ) continue ; // note: errored and incomplete backups are cleaned up by the backup retention logic
2022-04-28 18:43:14 -07:00
let backupFilePath = backupFormat . api ( backup . format ) . getBackupFilePath ( backupConfig , backup . remotePath ) ;
2021-07-14 11:07:19 -07:00
if ( backup . format === 'rsync' ) backupFilePath = backupFilePath + '/' ; // add trailing slash to indicate directory
2022-04-14 08:07:03 -05:00
const [ existsError , exists ] = await safe ( storage . api ( backupConfig . provider ) . exists ( backupConfig , backupFilePath ) ) ;
2021-07-14 11:07:19 -07:00
if ( existsError || exists ) continue ;
2022-04-04 14:13:27 -07:00
await progressCallback ( { message : ` Removing missing backup ${ backup . remotePath } ` } ) ;
2021-07-14 11:07:19 -07:00
const [ delError ] = await safe ( backups . del ( backup . id ) ) ;
2023-04-16 10:49:59 +02:00
if ( delError ) debug ( ` cleanupMissingBackups: error removing ${ backup . id } from database. %o ` , delError ) ;
2021-07-14 11:07:19 -07:00
2022-04-04 14:13:27 -07:00
missingBackupPaths . push ( backup . remotePath ) ;
2021-07-14 11:07:19 -07:00
}
2021-09-26 21:59:48 -07:00
++ page ;
} while ( result . length === perPage ) ;
debug ( 'cleanupMissingBackups: done' ) ;
2021-07-14 11:07:19 -07:00
2022-04-04 14:13:27 -07:00
return missingBackupPaths ;
2021-07-14 11:07:19 -07:00
}
// removes the snapshots of apps that have been uninstalled
2021-08-20 09:19:44 -07:00
async function cleanupSnapshots ( backupConfig ) {
2021-07-14 11:07:19 -07:00
assert . strictEqual ( typeof backupConfig , 'object' ) ;
2021-08-20 09:19:44 -07:00
const contents = safe . fs . readFileSync ( paths . SNAPSHOT _INFO _FILE , 'utf8' ) ;
const info = safe . JSON . parse ( contents ) ;
if ( ! info ) return ;
2021-07-14 11:07:19 -07:00
2022-04-14 16:07:01 -05:00
const progressCallback = ( progress ) => { debug ( ` cleanupSnapshots: ${ progress . message } ` ) ; } ;
2021-08-20 09:19:44 -07:00
for ( const appId of Object . keys ( info ) ) {
2023-01-31 10:58:51 +01:00
if ( appId === 'box' || appId === 'mail' ) continue ;
2021-09-26 21:59:48 -07:00
const app = await apps . get ( appId ) ;
2021-08-20 09:19:44 -07:00
if ( app ) continue ; // app is still installed
2022-04-14 16:07:01 -05:00
if ( info [ appId ] . format === 'tgz' ) {
2022-04-28 18:43:14 -07:00
await safe ( storage . api ( backupConfig . provider ) . remove ( backupConfig , backupFormat . api ( info [ appId ] . format ) . getBackupFilePath ( backupConfig , ` snapshot/app_ ${ appId } ` ) ) , { debug } ) ;
2022-04-14 16:07:01 -05:00
} else {
2022-04-28 18:43:14 -07:00
await safe ( storage . api ( backupConfig . provider ) . removeDir ( backupConfig , backupFormat . api ( info [ appId ] . format ) . getBackupFilePath ( backupConfig , ` snapshot/app_ ${ appId } ` ) , progressCallback ) , { debug } ) ;
2022-04-14 16:07:01 -05:00
}
safe . fs . unlinkSync ( path . join ( paths . BACKUP _INFO _DIR , ` ${ appId } .sync.cache ` ) ) ;
safe . fs . unlinkSync ( path . join ( paths . BACKUP _INFO _DIR , ` ${ appId } .sync.cache.new ` ) ) ;
await safe ( backups . setSnapshotInfo ( appId , null /* info */ ) , { debug } ) ;
debug ( ` cleanupSnapshots: cleaned up snapshot of app ${ appId } ` ) ;
2021-08-20 09:19:44 -07:00
}
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
debug ( 'cleanupSnapshots: done' ) ;
2021-07-14 11:07:19 -07:00
}
2021-08-20 09:19:44 -07:00
async function run ( progressCallback ) {
2021-07-14 11:07:19 -07:00
assert . strictEqual ( typeof progressCallback , 'function' ) ;
2023-08-04 11:24:28 +05:30
const backupConfig = await backups . getConfig ( ) ;
const { retention } = await backups . getPolicy ( ) ;
2023-09-05 08:48:48 +05:30
debug ( ` run: retention is ${ JSON . stringify ( retention ) } ` ) ;
2021-08-19 13:24:38 -07:00
2024-09-09 17:39:17 +02:00
const status = await backups . ensureMounted ( ) ;
2023-09-05 08:48:48 +05:30
debug ( ` run: mount point status is ${ JSON . stringify ( status ) } ` ) ;
2022-10-02 17:22:44 +02:00
if ( status . state !== 'active' ) throw new BoxError ( BoxError . MOUNT _ERROR , ` Backup endpoint is not mounted: ${ status . message } ` ) ;
2022-03-30 10:17:20 -07:00
2023-07-12 10:01:53 +05:30
if ( retention . keepWithinSecs < 0 ) {
2023-09-05 08:48:48 +05:30
debug ( 'run: keeping all backups' ) ;
2021-08-20 09:19:44 -07:00
return { } ;
}
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
await progressCallback ( { percent : 10 , message : 'Cleaning box backups' } ) ;
2023-07-12 10:01:53 +05:30
const { removedBoxBackupPaths , referencedBackupIds } = await cleanupBoxBackups ( backupConfig , retention , progressCallback ) ; // references is app or mail backup ids
2021-07-14 11:07:19 -07:00
2021-11-16 19:52:51 -08:00
await progressCallback ( { percent : 20 , message : 'Cleaning mail backups' } ) ;
2023-07-12 10:01:53 +05:30
const removedMailBackupPaths = await cleanupMailBackups ( backupConfig , retention , referencedBackupIds , progressCallback ) ;
2021-11-16 19:52:51 -08:00
2021-08-20 09:19:44 -07:00
await progressCallback ( { percent : 40 , message : 'Cleaning app backups' } ) ;
2024-12-10 10:06:52 +01:00
const archivedBackupIds = await archives . listBackupIds ( ) ;
const removedAppBackupPaths = await cleanupAppBackups ( backupConfig , retention , referencedBackupIds . concat ( archivedBackupIds ) , progressCallback ) ;
2021-07-14 11:07:19 -07:00
2022-07-02 17:16:47 +05:30
await progressCallback ( { percent : 70 , message : 'Checking storage backend and removing stale entries in database' } ) ;
2022-04-04 14:13:27 -07:00
const missingBackupPaths = await cleanupMissingBackups ( backupConfig , progressCallback ) ;
2021-07-14 11:07:19 -07:00
2021-08-20 09:19:44 -07:00
await progressCallback ( { percent : 90 , message : 'Cleaning snapshots' } ) ;
await cleanupSnapshots ( backupConfig ) ;
2021-07-14 11:07:19 -07:00
2022-04-04 14:13:27 -07:00
return { removedBoxBackupPaths , removedMailBackupPaths , removedAppBackupPaths , missingBackupPaths } ;
2021-07-14 11:07:19 -07:00
}