s3: make delete objects work again again

This commit is contained in:
Girish Ramakrishnan
2025-07-15 00:31:07 +02:00
parent 76f365f7e8
commit bb257f2bf2

View File

@@ -29,6 +29,7 @@ const assert = require('assert'),
{ ConfiguredRetryStrategy } = require('@smithy/util-retry'),
constants = require('../constants.js'),
consumers = require('node:stream/consumers'),
crypto = require('crypto'),
debug = require('debug')('box:storage/s3'),
http = require('http'),
https = require('https'),
@@ -50,6 +51,36 @@ function formatError(error) {
const RETRY_STRATEGY = new ConfiguredRetryStrategy(10 /* max attempts */, (/* attempt */) => 20000 /* constant backoff */);
// AWS decided to use CRC32 for checksums. The Client SDK has then been changed to set this for requests as default.
// requestChecksumCalculation: "WHEN_REQUIRED", responseChecksumValidation: "WHEN_REQUIRED", "checksumAlgorithm": "md5" all don't work
// see also: https://github.com/aws/aws-sdk-js-v3/issues/6810 https://github.com/aws/aws-sdk-js-v3/issues/6819 https://github.com/aws/aws-sdk-js-v3/issues/6761
// this implements https://github.com/aws/aws-sdk-js-v3/blob/main/supplemental-docs/MD5_FALLBACK.md
const md5Middleware = (next, context) => async (args) => {
const isDeleteObjects = context.commandName === 'DeleteObjectsCommand';
if (!isDeleteObjects) return next(args);
const headers = args.request.headers;
// Remove any checksum headers added by default middleware. This ensures our Content-MD5 is the primary integrity check
for (const header of Object.keys(headers)) {
const lowerHeader = header.toLowerCase();
if (lowerHeader.startsWith('x-amz-checksum-') || lowerHeader.startsWith('x-amz-sdk-checksum-')) {
delete headers[header];
}
}
if (args.request.body) {
const bodyContent = Buffer.from(args.request.body);
headers['Content-MD5'] = crypto.createHash('md5').update(bodyContent).digest('base64');
}
// DO spaces won't respond to 100-continue. not sure why. 76f365f7e8233efb335ba386a9f558b09238b08a has another way to delete this header
delete headers.Expect;
return await next(args);
};
function createS3Client(apiConfig, options) {
assert.strictEqual(typeof apiConfig, 'object');
assert.strictEqual(typeof options, 'object');
@@ -59,47 +90,33 @@ function createS3Client(apiConfig, options) {
secretAccessKey: apiConfig.secretAccessKey
};
class CustomHttpHandler extends NodeHttpHandler {
// AWS decided to use CRC32 for checksums. The Client SDK has then been changed to set this for requests as default. https://github.com/aws/aws-sdk-js-v3/blob/main/supplemental-docs/MD5_FALLBACK.md
// requestChecksumCalculation: "WHEN_REQUIRED", responseChecksumValidation: "WHEN_REQUIRED", "checksumAlgorithm": "md5" all don't work
// Non-AWS doesn't support this. It seems when there is a request body, the sdk always sends Expect 101-continue header before sending request body. DO Spaces just doesn't respond to this.
// see also: https://github.com/aws/aws-sdk-js-v3/issues/6810 https://github.com/aws/aws-sdk-js-v3/issues/6819 https://github.com/aws/aws-sdk-js-v3/issues/6761
#removeExpectHeader;
constructor(options) {
super(options);
this.#removeExpectHeader = options.removeExpectHeader;
if (options.http) this.agent = new http.Agent({});
if (options.acceptSelfSignedCerts) this.agent = new https.Agent({ rejectUnauthorized: false });
}
async handle(request, options) {
if (this.#removeExpectHeader && request.headers?.Expect) delete request.headers['Expect'];
return super.handle(request, options);
}
}
// aws s3 endpoint names come from the SDK
const isHttps = apiConfig.endpoint?.startsWith('https://') || apiConfig.provider === 's3';
const requestHandler = new NodeHttpHandler({
connectionTimeout: 60000,
socketTimeout: 20 * 60 * 1000
});
// sdk v3 only has signature support v4
const clientConfig = {
forcePathStyle: apiConfig.s3ForcePathStyle === true ? true : false, // Use vhost style instead of path style - https://forums.aws.amazon.com/ann.jspa?annID=6776
region: apiConfig.region || 'us-east-1',
credentials,
requestHandler: new CustomHttpHandler({
connectionTimeout: 60000,
socketTimeout: 20 * 60 * 1000,
removeExpectHeader: options.removeExpectHeader,
http: !isHttps,
acceptSelfSignedCerts: isHttps && (apiConfig.acceptSelfSignedCerts || apiConfig.bucket.includes('.'))
}),
requestHandler,
// logger: console
};
if (options.retryStrategy) clientConfig.retryStrategy = options.retryStrategy;
if (apiConfig.endpoint) clientConfig.endpoint = apiConfig.endpoint;
// s3 endpoint names come from the SDK
const isHttps = clientConfig.endpoint?.startsWith('https://') || apiConfig.provider === 's3';
if (isHttps) {
if (apiConfig.acceptSelfSignedCerts || apiConfig.bucket.includes('.')) {
requestHandler.agent = new https.Agent({ rejectUnauthorized: false });
}
} else { // http agent is required for http endpoints
requestHandler.agent = new http.Agent({});
}
const client = constants.TEST ? new globalThis.S3Mock(clientConfig) : new S3(clientConfig);
// https://github.com/aws/aws-sdk-js-v3/issues/6761#issuecomment-2574480834
// client.middlewareStack.add((next, context) => async (args) => {
@@ -115,6 +132,17 @@ function createS3Client(apiConfig, options) {
// override: true,
// });
// This ensures it runs after default checksums might be added, but before signing
if (options.deleteObjects && apiConfig.provider !== 's3') {
// flexibleChecksumsMiddleware is only present when the request has a body. Only use this for DeleteObjects call. Other requests without a body will crash
client.middlewareStack.addRelativeTo(md5Middleware, {
relation: 'after',
toMiddleware: 'flexibleChecksumsMiddleware',
name: 'addMD5ChecksumForDeleteObjects',
tags: ['MD5_FALLBACK'],
});
}
return client;
}
@@ -478,7 +506,8 @@ async function removeDir(apiConfig, pathPrefix, progressCallback) {
assert.strictEqual(typeof pathPrefix, 'string');
assert.strictEqual(typeof progressCallback, 'function');
const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY, removeExpectHeader: true });
// only use this client for DeleteObjects call. It forces md5 checksum and for anything else, it might crash
const deleteObjectsS3Client = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY, deleteObjects: true });
let total = 0;
let marker = null;
@@ -503,7 +532,7 @@ async function removeDir(apiConfig, pathPrefix, progressCallback) {
progressCallback({ message: `Removing ${objects.length} files from ${firstPath} to ${lastPath}` });
// deleteObjects does not return error if key is not found
const [error] = await safe(s3.deleteObjects(deleteParams));
const [error] = await safe(deleteObjectsS3Client.deleteObjects(deleteParams));
if (error) {
progressCallback({ message: `Unable to remove from ${firstPath} to ${lastPath} ${error.message || error.Code}` });
throw new BoxError(BoxError.EXTERNAL_ERROR, `Unable to remove from ${firstPath} to ${lastPath}. error: ${error.message}`);