diff --git a/src/storage/s3.js b/src/storage/s3.js index 4eab6b2c8..4bfa20d65 100644 --- a/src/storage/s3.js +++ b/src/storage/s3.js @@ -29,6 +29,7 @@ const assert = require('assert'), { ConfiguredRetryStrategy } = require('@smithy/util-retry'), constants = require('../constants.js'), consumers = require('node:stream/consumers'), + crypto = require('crypto'), debug = require('debug')('box:storage/s3'), http = require('http'), https = require('https'), @@ -50,6 +51,36 @@ function formatError(error) { const RETRY_STRATEGY = new ConfiguredRetryStrategy(10 /* max attempts */, (/* attempt */) => 20000 /* constant backoff */); +// AWS decided to use CRC32 for checksums. The Client SDK has then been changed to set this for requests as default. +// requestChecksumCalculation: "WHEN_REQUIRED", responseChecksumValidation: "WHEN_REQUIRED", "checksumAlgorithm": "md5" all don't work +// see also: https://github.com/aws/aws-sdk-js-v3/issues/6810 https://github.com/aws/aws-sdk-js-v3/issues/6819 https://github.com/aws/aws-sdk-js-v3/issues/6761 +// this implements https://github.com/aws/aws-sdk-js-v3/blob/main/supplemental-docs/MD5_FALLBACK.md +const md5Middleware = (next, context) => async (args) => { + const isDeleteObjects = context.commandName === 'DeleteObjectsCommand'; + + if (!isDeleteObjects) return next(args); + + const headers = args.request.headers; + + // Remove any checksum headers added by default middleware. This ensures our Content-MD5 is the primary integrity check + for (const header of Object.keys(headers)) { + const lowerHeader = header.toLowerCase(); + if (lowerHeader.startsWith('x-amz-checksum-') || lowerHeader.startsWith('x-amz-sdk-checksum-')) { + delete headers[header]; + } + } + + if (args.request.body) { + const bodyContent = Buffer.from(args.request.body); + headers['Content-MD5'] = crypto.createHash('md5').update(bodyContent).digest('base64'); + } + + // DO spaces won't respond to 100-continue. not sure why. 76f365f7e8233efb335ba386a9f558b09238b08a has another way to delete this header + delete headers.Expect; + + return await next(args); +}; + function createS3Client(apiConfig, options) { assert.strictEqual(typeof apiConfig, 'object'); assert.strictEqual(typeof options, 'object'); @@ -59,47 +90,33 @@ function createS3Client(apiConfig, options) { secretAccessKey: apiConfig.secretAccessKey }; - class CustomHttpHandler extends NodeHttpHandler { - // AWS decided to use CRC32 for checksums. The Client SDK has then been changed to set this for requests as default. https://github.com/aws/aws-sdk-js-v3/blob/main/supplemental-docs/MD5_FALLBACK.md - // requestChecksumCalculation: "WHEN_REQUIRED", responseChecksumValidation: "WHEN_REQUIRED", "checksumAlgorithm": "md5" all don't work - // Non-AWS doesn't support this. It seems when there is a request body, the sdk always sends Expect 101-continue header before sending request body. DO Spaces just doesn't respond to this. - // see also: https://github.com/aws/aws-sdk-js-v3/issues/6810 https://github.com/aws/aws-sdk-js-v3/issues/6819 https://github.com/aws/aws-sdk-js-v3/issues/6761 - #removeExpectHeader; - constructor(options) { - super(options); - this.#removeExpectHeader = options.removeExpectHeader; - - if (options.http) this.agent = new http.Agent({}); - if (options.acceptSelfSignedCerts) this.agent = new https.Agent({ rejectUnauthorized: false }); - } - - async handle(request, options) { - if (this.#removeExpectHeader && request.headers?.Expect) delete request.headers['Expect']; - return super.handle(request, options); - } - } - - // aws s3 endpoint names come from the SDK - const isHttps = apiConfig.endpoint?.startsWith('https://') || apiConfig.provider === 's3'; + const requestHandler = new NodeHttpHandler({ + connectionTimeout: 60000, + socketTimeout: 20 * 60 * 1000 + }); // sdk v3 only has signature support v4 const clientConfig = { forcePathStyle: apiConfig.s3ForcePathStyle === true ? true : false, // Use vhost style instead of path style - https://forums.aws.amazon.com/ann.jspa?annID=6776 region: apiConfig.region || 'us-east-1', credentials, - requestHandler: new CustomHttpHandler({ - connectionTimeout: 60000, - socketTimeout: 20 * 60 * 1000, - removeExpectHeader: options.removeExpectHeader, - http: !isHttps, - acceptSelfSignedCerts: isHttps && (apiConfig.acceptSelfSignedCerts || apiConfig.bucket.includes('.')) - }), + requestHandler, // logger: console }; if (options.retryStrategy) clientConfig.retryStrategy = options.retryStrategy; if (apiConfig.endpoint) clientConfig.endpoint = apiConfig.endpoint; + // s3 endpoint names come from the SDK + const isHttps = clientConfig.endpoint?.startsWith('https://') || apiConfig.provider === 's3'; + if (isHttps) { + if (apiConfig.acceptSelfSignedCerts || apiConfig.bucket.includes('.')) { + requestHandler.agent = new https.Agent({ rejectUnauthorized: false }); + } + } else { // http agent is required for http endpoints + requestHandler.agent = new http.Agent({}); + } + const client = constants.TEST ? new globalThis.S3Mock(clientConfig) : new S3(clientConfig); // https://github.com/aws/aws-sdk-js-v3/issues/6761#issuecomment-2574480834 // client.middlewareStack.add((next, context) => async (args) => { @@ -115,6 +132,17 @@ function createS3Client(apiConfig, options) { // override: true, // }); + // This ensures it runs after default checksums might be added, but before signing + if (options.deleteObjects && apiConfig.provider !== 's3') { + // flexibleChecksumsMiddleware is only present when the request has a body. Only use this for DeleteObjects call. Other requests without a body will crash + client.middlewareStack.addRelativeTo(md5Middleware, { + relation: 'after', + toMiddleware: 'flexibleChecksumsMiddleware', + name: 'addMD5ChecksumForDeleteObjects', + tags: ['MD5_FALLBACK'], + }); + } + return client; } @@ -478,7 +506,8 @@ async function removeDir(apiConfig, pathPrefix, progressCallback) { assert.strictEqual(typeof pathPrefix, 'string'); assert.strictEqual(typeof progressCallback, 'function'); - const s3 = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY, removeExpectHeader: true }); + // only use this client for DeleteObjects call. It forces md5 checksum and for anything else, it might crash + const deleteObjectsS3Client = createS3Client(apiConfig, { retryStrategy: RETRY_STRATEGY, deleteObjects: true }); let total = 0; let marker = null; @@ -503,7 +532,7 @@ async function removeDir(apiConfig, pathPrefix, progressCallback) { progressCallback({ message: `Removing ${objects.length} files from ${firstPath} to ${lastPath}` }); // deleteObjects does not return error if key is not found - const [error] = await safe(s3.deleteObjects(deleteParams)); + const [error] = await safe(deleteObjectsS3Client.deleteObjects(deleteParams)); if (error) { progressCallback({ message: `Unable to remove from ${firstPath} to ${lastPath} ${error.message || error.Code}` }); throw new BoxError(BoxError.EXTERNAL_ERROR, `Unable to remove from ${firstPath} to ${lastPath}. error: ${error.message}`);