diff --git a/src/backups.js b/src/backups.js index 1e4a3cc27..c8f1a72ab 100644 --- a/src/backups.js +++ b/src/backups.js @@ -41,6 +41,7 @@ var addons = require('./addons.js'), DatabaseError = require('./databaseerror.js'), debug = require('debug')('box:backups'), eventlog = require('./eventlog.js'), + fs = require('fs'), locker = require('./locker.js'), mailer = require('./mailer.js'), mkdirp = require('mkdirp'), @@ -52,6 +53,7 @@ var addons = require('./addons.js'), safe = require('safetydance'), shell = require('./shell.js'), settings = require('./settings.js'), + syncer = require('./syncer.js'), tar = require('tar-fs'), util = require('util'), zlib = require('zlib'); @@ -158,13 +160,16 @@ function getRestoreConfig(backupId, callback) { }); } -function getBackupFilePath(backupConfig, backupId) { +function getBackupFilePath(backupConfig, backupId, subpath) { assert.strictEqual(typeof backupConfig, 'object'); assert.strictEqual(typeof backupId, 'string'); - const FILE_TYPE = backupConfig.key ? '.tar.gz.enc' : '.tar.gz'; - - return path.join(backupConfig.prefix || backupConfig.backupFolder, backupId+FILE_TYPE); + if (backupConfig.format === 'tgz') { + const fileType = backupConfig.key ? '.tar.gz.enc' : '.tar.gz'; + return path.join(backupConfig.prefix || backupConfig.backupFolder, backupId+fileType); + } else { + return path.join(backupConfig.prefix || backupConfig.backupFolder, backupId, subpath || ''); + } } function createTarPackStream(sourceDir, key) { @@ -210,6 +215,31 @@ function createTarPackStream(sourceDir, key) { } } +function sync(backupConfig, backupId, dataDir, callback) { + syncer.sync(dataDir, function processTask(task, iteratorCallback) { + debug('syncer task: %j', task); + if (task.operation === 'add') { + var stream = fs.createReadStream(path.join(dataDir, task.path)); + stream.on('error', function () { return iteratorCallback(); }); // ignore error if file disappears + api(backupConfig.provider).upload(backupConfig, getBackupFilePath(backupConfig, backupId, task.path), stream, iteratorCallback); + } else if (task.operation === 'remove') { + api(backupConfig.provider).remove(backupConfig, getBackupFilePath(backupConfig, backupId, task.path), iteratorCallback); + } + }, callback); +} + +function saveEmptyDirs(appDataDir, callback) { + assert.strictEqual(typeof appDataDir, 'string'); + assert.strictEqual(typeof callback, 'function'); + + var emptyDirs = safe.child_process.execSync('find . -type d -empty', { cwd: `${appDataDir}` }); + + if (emptyDirs === null) return callback(safe.error); + + if (!safe.fs.writeFileSync(`${appDataDir}/emptydirs.txt`, emptyDirs)) return callback(safe.error); + callback(); +} + // this function is called via backuptask (since it needs root to traverse app's directory) function upload(backupId, dataDir, callback) { assert.strictEqual(typeof backupId, 'string'); @@ -222,9 +252,16 @@ function upload(backupId, dataDir, callback) { settings.getBackupConfig(function (error, backupConfig) { if (error) return callback(new BackupsError(BackupsError.INTERNAL_ERROR, error)); - var tarStream = createTarPackStream(dataDir, backupConfig.key || null); - tarStream.on('error', callback); // already returns BackupsError - api(backupConfig.provider).upload(backupConfig, getBackupFilePath(backupConfig, backupId), tarStream, callback); + if (backupConfig.format === 'tgz') { + var tarStream = createTarPackStream(dataDir, backupConfig.key || null); + tarStream.on('error', callback); // already returns BackupsError + api(backupConfig.provider).upload(backupConfig, getBackupFilePath(backupConfig, backupId), tarStream, callback); + } else { + async.series([ + saveEmptyDirs.bind(null, dataDir), + sync.bind(null, backupConfig, backupId, dataDir) + ], callback); + } }); } @@ -410,17 +447,20 @@ function rotateBoxBackup(backupConfig, timestamp, appBackupIds, callback) { var snapshotTime = snapshotInfo.timestamp.replace(/[T.]/g, '-').replace(/[:Z]/g,''); var backupId = util.format('%s/box_%s_v%s', timestamp, snapshotTime, config.version()); + debug('rotateBoxBackup: rotating to id:%s', backupId); + backupdb.add({ id: backupId, version: config.version(), type: backupdb.BACKUP_TYPE_BOX, dependsOn: appBackupIds, restoreConfig: null }, function (error) { if (error) return callback(new BackupsError(BackupsError.INTERNAL_ERROR, error)); api(backupConfig.provider).copy(backupConfig, getBackupFilePath(backupConfig, 'snapshot/box'), getBackupFilePath(backupConfig, backupId), function (copyBackupError) { const state = copyBackupError ? backupdb.BACKUP_STATE_ERROR : backupdb.BACKUP_STATE_NORMAL; - debug('rotateBoxBackup: successful id:%s', backupId); backupdb.update(backupId, { state: state }, function (error) { if (copyBackupError) return callback(new BackupsError(BackupsError.EXTERNAL_ERROR, copyBackupError.message)); if (error) return callback(new BackupsError(BackupsError.INTERNAL_ERROR, error)); + debug('rotateBoxBackup: successful id:%s', backupId); + // FIXME this is only needed for caas, hopefully we can remove that in the future api(backupConfig.provider).backupDone(backupId, appBackupIds, function (error) { if (error) return callback(error); @@ -503,6 +543,8 @@ function rotateAppBackup(backupConfig, app, timestamp, callback) { var manifest = restoreConfig.manifest; var backupId = util.format('%s/app_%s_%s_v%s', timestamp, app.id, snapshotTime, manifest.version); + debugApp(app, 'rotateAppBackup: rotating to id:%s', backupId); + backupdb.add({ id: backupId, version: manifest.version, type: backupdb.BACKUP_TYPE_APP, dependsOn: [ ], restoreConfig: restoreConfig }, function (error) { if (error) return callback(new BackupsError(BackupsError.INTERNAL_ERROR, error)); diff --git a/src/paths.js b/src/paths.js index 823cb79a0..21f2b8eee 100644 --- a/src/paths.js +++ b/src/paths.js @@ -21,6 +21,7 @@ exports = module.exports = { NGINX_CONFIG_DIR: path.join(config.baseDir(), 'platformdata/nginx'), NGINX_APPCONFIG_DIR: path.join(config.baseDir(), 'platformdata/nginx/applications'), NGINX_CERT_DIR: path.join(config.baseDir(), 'platformdata/nginx/cert'), + SNAPSHOT_DIR: path.join(config.baseDir(), 'platformdata/snapshots'), SNAPSHOT_INFO_FILE: path.join(config.baseDir(), 'platformdata/snapshots/info.json'), // this is not part of appdata because an icon may be set before install diff --git a/src/storage/caas.js b/src/storage/caas.js index 989779093..7bacfd0b7 100644 --- a/src/storage/caas.js +++ b/src/storage/caas.js @@ -58,8 +58,6 @@ function upload(apiConfig, backupFilePath, sourceStream, callback) { assert.strictEqual(typeof sourceStream, 'object'); assert.strictEqual(typeof callback, 'function'); - debug('upload: %s', backupFilePath); - getBackupCredentials(apiConfig, function (error, credentials) { if (error) return callback(error); var params = { @@ -174,21 +172,48 @@ function copy(apiConfig, oldFilePath, newFilePath, callback) { getBackupCredentials(apiConfig, function (error, credentials) { if (error) return callback(error); - var params = { + var s3 = new AWS.S3(credentials); + var listParams = { Bucket: apiConfig.bucket, - Key: newFilePath, - CopySource: path.join(apiConfig.bucket, oldFilePath) + Prefix: oldFilePath }; - var s3 = new AWS.S3(credentials); - s3.copyObject(params, function (error) { - if (error && error.code === 'NoSuchKey') return callback(new BackupsError(BackupsError.NOT_FOUND)); - if (error) { - debug('copy: s3 copy error.', error); - return callback(new BackupsError(BackupsError.EXTERNAL_ERROR, error)); - } + async.forever(function listAndDelete(foreverCallback) { + s3.listObjectsV2(listParams, function (error, listData) { + if (error) { + debug('remove: Failed to list %s. Not fatal.', error); + return foreverCallback(new BackupsError(BackupsError.EXTERNAL_ERROR, error.message)); + } - callback(null); + async.eachLimit(listData.Contents, 10, function copyFile(content, iteratorCallback) { + var relativePath = path.relative(oldFilePath, content.Key); + + var copyParams = { + Bucket: apiConfig.bucket, + Key: path.join(newFilePath, relativePath), + CopySource: path.join(apiConfig.bucket, content.Key) + }; + + s3.copyObject(copyParams, function (error) { + if (error && error.code === 'NoSuchKey') return iteratorCallback(new BackupsError(BackupsError.NOT_FOUND, 'Old backup not found')); + if (error) { + debug('copy: s3 copy error.', error); + return iteratorCallback(new BackupsError(BackupsError.EXTERNAL_ERROR, error.message)); + } + + iteratorCallback(); + }); + }, function doneCopying(error) { + if (error) return foreverCallback(error); + + if (listData.IsTruncated) return foreverCallback(); + + foreverCallback(new Error('Done')); + }); + }); + }, function (error) { + if (error.message === 'Done') return callback(); + callback(error); }); }); } diff --git a/src/storage/filesystem.js b/src/storage/filesystem.js index fe6c78c94..184e1ceba 100644 --- a/src/storage/filesystem.js +++ b/src/storage/filesystem.js @@ -34,11 +34,11 @@ function upload(apiConfig, backupFilePath, sourceStream, callback) { assert.strictEqual(typeof sourceStream, 'object'); assert.strictEqual(typeof callback, 'function'); - debug('upload: %s', backupFilePath); - mkdirp(path.dirname(backupFilePath), function (error) { if (error) return callback(new BackupsError(BackupsError.EXTERNAL_ERROR, error.message)); + safe.fs.unlinkSync(backupFilePath); // remove any hardlink + var fileStream = fs.createWriteStream(backupFilePath); fileStream.on('error', function (error) { @@ -47,11 +47,9 @@ function upload(apiConfig, backupFilePath, sourceStream, callback) { }); fileStream.on('close', function () { - debug('[%s] upload: changing ownership.', backupFilePath); - if (!safe.child_process.execSync('chown -R ' + BACKUP_USER + ':' + BACKUP_USER + ' ' + path.dirname(backupFilePath))) return callback(new BackupsError(BackupsError.INTERNAL_ERROR, safe.error.message)); - debug('[%s] upload: done.', backupFilePath); + debug('upload %s: done.', backupFilePath); callback(null); }); diff --git a/src/storage/s3.js b/src/storage/s3.js index b445006f4..56b8226e0 100644 --- a/src/storage/s3.js +++ b/src/storage/s3.js @@ -66,8 +66,6 @@ function upload(apiConfig, backupFilePath, sourceStream, callback) { assert.strictEqual(typeof sourceStream, 'object'); assert.strictEqual(typeof callback, 'function'); - debug('upload: %s', backupFilePath); - getBackupCredentials(apiConfig, function (error, credentials) { if (error) return callback(error); @@ -183,21 +181,48 @@ function copy(apiConfig, oldFilePath, newFilePath, callback) { getBackupCredentials(apiConfig, function (error, credentials) { if (error) return callback(error); - var params = { + var s3 = new AWS.S3(credentials); + var listParams = { Bucket: apiConfig.bucket, - Key: newFilePath, - CopySource: path.join(apiConfig.bucket, oldFilePath) + Prefix: oldFilePath }; - var s3 = new AWS.S3(credentials); - s3.copyObject(params, function (error) { - if (error && error.code === 'NoSuchKey') return callback(new BackupsError(BackupsError.NOT_FOUND, 'Old backup not found')); - if (error) { - debug('copy: s3 copy error.', error); - return callback(new BackupsError(BackupsError.EXTERNAL_ERROR, error.message)); - } + async.forever(function listAndDelete(foreverCallback) { + s3.listObjectsV2(listParams, function (error, listData) { + if (error) { + debug('remove: Failed to list %s. Not fatal.', error); + return foreverCallback(new BackupsError(BackupsError.EXTERNAL_ERROR, error.message)); + } - callback(null); + async.eachLimit(listData.Contents, 10, function copyFile(content, iteratorCallback) { + var relativePath = path.relative(oldFilePath, content.Key); + + var copyParams = { + Bucket: apiConfig.bucket, + Key: path.join(newFilePath, relativePath), + CopySource: path.join(apiConfig.bucket, content.Key) + }; + + s3.copyObject(copyParams, function (error) { + if (error && error.code === 'NoSuchKey') return iteratorCallback(new BackupsError(BackupsError.NOT_FOUND, 'Old backup not found')); + if (error) { + debug('copy: s3 copy error.', error); + return iteratorCallback(new BackupsError(BackupsError.EXTERNAL_ERROR, error.message)); + } + + iteratorCallback(); + }); + }, function doneCopying(error) { + if (error) return foreverCallback(error); + + if (listData.IsTruncated) return foreverCallback(); + + foreverCallback(new Error('Done')); + }); + }); + }, function (error) { + if (error.message === 'Done') return callback(); + callback(error); }); }); } diff --git a/src/syncer.js b/src/syncer.js new file mode 100644 index 000000000..3a978b362 --- /dev/null +++ b/src/syncer.js @@ -0,0 +1,86 @@ +'use strict'; + +var assert = require('assert'), + fs = require('fs'), + path = require('path'), + paths = require('./paths.js'), + safe = require('safetydance'); + +exports = module.exports = { + sync: sync +}; + +function readCache(cacheFile) { + assert.strictEqual(typeof cacheFile, 'string'); + + var cache = safe.fs.readFileSync(cacheFile, 'utf8'); + if (!cache) return [ ]; + var result = cache.split('\n').map(JSON.parse); + return result; +} + +function readTree(dir) { + assert.strictEqual(typeof dir, 'string'); + + var list = safe.fs.readdirSync(dir).sort(); + if (!list) return [ ]; + + // TODO: handle lstat errors + return list.map(function (e) { return { stat: fs.lstatSync(path.join(dir, e)), name: e }; }); +} + +// TODO: concurrency +// TODO: if dir became a file, remove the dir first +// TODO: write to index can simply append to a new cache file +function sync(dir, taskProcessor, callback) { + assert.strictEqual(typeof dir, 'string'); + assert.strictEqual(typeof taskProcessor, 'function'); + assert.strictEqual(typeof callback, 'function'); + + var curCacheIndex = 0, newCache = [ ]; + var cache = readCache(path.join(paths.SNAPSHOT_DIR, path.basename(dir) + '.cache')); + + var dummyCallback = function() { }; + + function advanceCache(entryPath) { + for (; curCacheIndex !== cache.length && (entryPath === '' || cache[curCacheIndex].path < entryPath); ++curCacheIndex) { + taskProcessor({ operation: 'remove', path: cache[curCacheIndex].path }, dummyCallback); + } + } + + function traverse(relpath) { + var entries = readTree(path.join(dir, relpath)); + + for (var i = 0; i < entries.length; i++) { + var entryPath = path.join(relpath, entries[i].name); + + if (entries[i].stat.isSymbolicLink()) continue; + + if (entries[i].stat.isDirectory()) { + traverse(entryPath); + continue; + } + + newCache.push({ stat: entries[i].stat, path: entryPath }); + + advanceCache(entryPath); + + if (curCacheIndex !== cache.length && cache[curCacheIndex].path === entryPath) { + if (entries[i].stat.mtime.getTime() !== cache[curCacheIndex].mtime) { + taskProcessor({ operation: 'add', path: entryPath }, dummyCallback); + } + ++curCacheIndex; + } else { + taskProcessor({ operation: 'add', path: entryPath }, dummyCallback); + } + } + } + + traverse(''); + advanceCache(''); // remove rest of the cache entries + + var newCacheContents = newCache.map(function (ce) { return JSON.stringify({ path: ce.path, mtime: ce.stat.mtime.getTime() }); }).join('\n'); + fs.writeFileSync(path.join(paths.SNAPSHOT_DIR, path.basename(dir) + '.cache'), newCacheContents, 'utf8'); + + callback(); +} \ No newline at end of file