storage: rework listDir api to be a generator (like) function

This commit is contained in:
Girish Ramakrishnan
2025-02-12 18:46:54 +01:00
parent da0dcf65b3
commit 9888aa8c08
9 changed files with 143 additions and 180 deletions
+32 -43
View File
@@ -28,8 +28,7 @@ const assert = require('assert'),
constants = require('../constants.js'),
debug = require('debug')('box:storage/gcs'),
path = require('path'),
safe = require('safetydance'),
util = require('util');
safe = require('safetydance');
let GCS = require('@google-cloud/storage').Storage;
@@ -119,33 +118,23 @@ async function download(apiConfig, backupFilePath) {
return file.createReadStream();
}
function listDir(apiConfig, backupFilePath, batchSize, iteratorCallback, callback) {
var bucket = getBucket(apiConfig);
async function listDir(apiConfig, backupFilePath, batchSize, marker) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof backupFilePath, 'string');
assert.strictEqual(typeof batchSize, 'number');
assert(typeof marker !== 'undefined');
var query = { prefix: backupFilePath, autoPaginate: batchSize === -1 };
if (batchSize > 0) {
query.maxResults = batchSize;
}
const bucket = getBucket(apiConfig);
let done = false;
const query = marker || { prefix: backupFilePath, autoPaginate: false, maxResults: batchSize };
async.whilst((testDone) => testDone(null, !done), function listAndDownload(whilstCallback) {
bucket.getFiles(query, function (error, files, nextQuery) {
if (error) return whilstCallback(error);
const [error, result] = await safe(bucket.getFiles(query));
if (error) throw new BoxError(BoxError.EXTERNAL_ERROR, `Failed to get files: ${error.message}`);
if (files.length === 0) { done = true; return whilstCallback(); }
const entries = files.map(function (f) { return { fullPath: f.name }; });
iteratorCallback(entries, function (error) {
if (error) return whilstCallback(error);
if (!nextQuery) { done = true; return whilstCallback(); }
query = nextQuery;
whilstCallback();
});
});
}, callback);
const [files, nextQuery] = result;
if (files.length === 0) return { entries: [], marker: null }; // no more
const entries = files.map(function (f) { return { fullPath: f.name }; });
return { entries, marker: nextQuery || null };
}
async function copy(apiConfig, oldFilePath, newFilePath, progressCallback) {
@@ -171,17 +160,16 @@ async function copy(apiConfig, oldFilePath, newFilePath, progressCallback) {
const concurrency = apiConfig.limits?.copyConcurrency || 10;
let total = 0;
const listDirAsync = util.promisify(listDir);
const [copyError] = await safe(listDirAsync(apiConfig, oldFilePath, batchSize, function (entries, done) {
total += entries.length;
progressCallback({ message: `Copying ${entries.length} files from ${entries[0].fullPath} to ${entries[entries.length-1].fullPath}. total: ${total}` });
async.eachLimit(entries, concurrency, copyFile, done);
}));
progressCallback({ message: `Copied ${total} files with error: ${copyError}` });
let marker = null;
while (true) {
const batch = await listDir(apiConfig, oldFilePath, batchSize, marker);
total += batch.entries.length;
progressCallback({ message: `Copying ${batch.entries.length} files from ${batch.entries[0].fullPath} to ${batch.entries[batch.entries.length-1].fullPath}. total: ${total}` });
await async.eachLimit(batch.entries, concurrency, copyFile);
if (!batch.marker) break;
marker = batch.marker;
}
progressCallback({ message: `Copied ${total} files` });
}
async function remove(apiConfig, filename) {
@@ -200,15 +188,16 @@ async function removeDir(apiConfig, pathPrefix, progressCallback) {
const batchSize = 1000, concurrency = apiConfig.limits?.deleteConcurrency || 10; // https://googleapis.dev/nodejs/storage/latest/Bucket.html#deleteFiles
let total = 0;
const listDirAsync = util.promisify(listDir);
await listDirAsync(apiConfig, pathPrefix, batchSize, function (entries, done) {
let marker = null;
while (true) {
const batch = await listDir(apiConfig, pathPrefix, batchSize, marker);
const entries = batch.entries;
total += entries.length;
progressCallback({ message: `Removing ${entries.length} files from ${entries[0].fullPath} to ${entries[entries.length-1].fullPath}. total: ${total}` });
async.eachLimit(entries, concurrency, async (entry) => await remove(apiConfig, entry.fullPath), done);
});
await async.eachLimit(entries, concurrency, async (entry) => await remove(apiConfig, entry.fullPath));
if (!batch.marker) break;
marker = batch.marker;
}
progressCallback({ progress: `Deleted ${total} files` });
}