Skip to content

Commit

Permalink
feat!: Flexible checksum v2 (#1803)
Browse files Browse the repository at this point in the history
* Add requestChecksumCalculation and responseChecksumValidation configs & AWSChecksumCalculationMode enum type for both config options.

* Add value resolvers for requestChecksumCalculation and responseChecksumValidation config options.

* Add requestChecksumCalculation and responseChecksumValidation configs to codegen side; add type info for AWSChecksumCalculationMode to codegen side.

* Add context extension for getting and setting the new config options; use this in setter codegen.

* Clean up flexchex request middleware conditionals into one logical flow and add in default algorithm selection logic.

* Fix validation mode logic in flexchex response middleware; now it uses user input as it's supposed to.

* Update flexchex request middleware codegen to pass in request checksum required flag to initializer.

* Update runtime tests for flexchex middlewares + add a test for no request checksum calculation flow.

* Update codegen tests.

* Add CRC64NVME as one of the algorithms to check for in flexchex response middleware.

* Add test case for no response validation when validation mode unset and responseChecksumValidation config is .whenRequired.

* Address compile time errors in generated code.

* Update codegen test

* Skip checksum flow if body is empty + ignore checksum of checksums that end with -#.

* Add edge case handling for a stream body with size below chunked threshold, hence checksum header must be sent in original request rather than in trailing header.

* Add business metric feature ID tracking for flexible checksum v2.

* Update initializer call in test.

* Reflect context thread-safe changes upstream.

* Fill impl gaps against SEP.

* Update codegen test & fix optional chaining.

* Fill unit test gap for flex checksum middlewares

* Temporarily comment out manual fill for requestAlgorithmMember http header.

* Add PRESIGN_URL flow to flexchex request middleware.

* Update comments in FlexibleChecksumsRequestMiddleware, fix composite checksum ignore logic in FlexibleChecksumsResponseMiddleware, and add a new integration test for usig presignedURLs with S3::PutObject.

* Add integration tests for default checksum flows with data and stream payloads. Fix checksum algorithm header insertion to only insert only when body is non-empty by adding early exit condition at beginning of middleware.

* Address swiftlint warnings + misc. comment changes.

* Response algorithms modeled for the operation via httpChecksum trait need to be respected.

* Update codegen test

* Bump minor version by 1; changes next release version from 1.0.79 to 1.1.0.

---------

Co-authored-by: Sichan Yoo <[email protected]>
  • Loading branch information
sichanyoo and Sichan Yoo authored Jan 14, 2025
1 parent ebdf969 commit 906d432
Show file tree
Hide file tree
Showing 25 changed files with 614 additions and 141 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ private var runtimeTargets: [Target] {
.smithyEventStreamsAuthAPI,
.awsSDKCommon,
.awsSDKHTTPAuth,
.awsSDKIdentity
.awsSDKIdentity,
.awsSDKChecksums,
],
path: "Sources/Core/AWSClientRuntime/Sources/AWSClientRuntime",
resources: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ final class S3FlexibleChecksumsTests: S3XCTestCase {

// MARK: - Data uploads

func test_putGetObject_data_default_algorithm() async throws {
// CRC32 should be used correctly as default algorithm when it's not configured
try await _testPutGetObject(withChecksumAlgorithm: nil, objectNameSuffix: "default-crc32-data", upload: .data(originalData))
}

func test_putGetObject_data_crc32() async throws {
try await _testPutGetObject(withChecksumAlgorithm: .crc32, objectNameSuffix: "crc32-data", upload: .data(originalData))
}
Expand All @@ -43,6 +48,12 @@ final class S3FlexibleChecksumsTests: S3XCTestCase {

// MARK: - Streaming uploads

func test_putGetObject_streaming_default_algorithm() async throws {
let bufferedStream = BufferedStream(data: originalData, isClosed: true)
// CRC32 should be used correctly as default algorithm when it's not configured
try await _testPutGetObject(withChecksumAlgorithm: nil, objectNameSuffix: "default-crc32-data", upload: .stream(bufferedStream))
}

func test_putGetObject_streaming_crc32() async throws {
let bufferedStream = BufferedStream(data: originalData, isClosed: true)
try await _testPutGetObject(withChecksumAlgorithm: .crc32, objectNameSuffix: "crc32", upload: .stream(bufferedStream))
Expand Down Expand Up @@ -129,7 +140,7 @@ final class S3FlexibleChecksumsTests: S3XCTestCase {
// MARK: - Private methods

private func _testPutGetObject(
withChecksumAlgorithm algorithm: S3ClientTypes.ChecksumAlgorithm,
withChecksumAlgorithm algorithm: S3ClientTypes.ChecksumAlgorithm?,
objectNameSuffix: String, upload: ByteStream, file: StaticString = #filePath, line: UInt = #line
) async throws {
let objectName = "flexible-checksums-s3-test-\(objectNameSuffix)"
Expand All @@ -143,8 +154,8 @@ final class S3FlexibleChecksumsTests: S3XCTestCase {

let output = try await client.putObject(input: input)

// Verify the checksum response based on the algorithm used.
let checksumResponse = try XCTUnwrap(getChecksumResponse(from: output, with: algorithm), file: file, line: line)
// Verify the checksum response based on the algorithm used; if algorithm was nil, crc32 should've been used so check for crc32.
let checksumResponse = try XCTUnwrap(getChecksumResponse(from: output, with: algorithm ?? .crc32), file: file, line: line)
XCTAssertNotNil(checksumResponse, file: file, line: line)

let getInput = GetObjectInput(bucket: bucketName, checksumMode: S3ClientTypes.ChecksumMode.enabled, key: objectName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,29 @@ class S3PresignedURLTests: S3XCTestCase {
XCTAssertNotNil(components?.queryItems?.first(where: { $0.name == "IfMatch" && $0.value == originalIfMatch }))
XCTAssertNotNil(components?.queryItems?.first(where: { $0.name == "IfNoneMatch" && $0.value == originalIfNoneMatch }))
}

func test_putObject_putsObjectWithPresignedURL() async throws {
let originalData = UUID().uuidString
let key = UUID().uuidString
let input = PutObjectInput(body: .data(originalData.data(using: .utf8)), bucket: bucketName, key: key)
let url = try await client.presignedURLForPutObject(input: input, expiration: 600);

var request = URLRequest(url: url)
request.httpBody = Data(originalData.utf8)
request.httpMethod = "PUT"

_ = try await perform(urlRequest: request)

let getObjResult = try await client.getObject(input: GetObjectInput(bucket: bucketName, key: key))

guard let fetchedData = try await getObjResult.body?.readData() else {
throw GetObjectError.failedToGetObject
}

XCTAssertEqual(Data(originalData.utf8), fetchedData)
}

enum GetObjectError: Error {
case failedToGetObject
}
}
2 changes: 1 addition & 1 deletion Package.version.next
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.79
1.1.0
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import enum ClientRuntime.ClientLogMode
import struct SmithyRetries.DefaultRetryStrategy
import struct SmithyRetries.ExponentialBackoffStrategy
import struct SmithyRetriesAPI.RetryStrategyOptions
import enum AWSSDKChecksums.AWSChecksumCalculationMode

typealias RuntimeConfigType = DefaultSDKRuntimeConfiguration<DefaultRetryStrategy, DefaultRetryErrorInfoProvider>

Expand Down Expand Up @@ -84,6 +85,40 @@ public class AWSClientConfigDefaultsProvider {
return resolvedAppID
}

public static func requestChecksumCalculation(
_ requestChecksumCalculation: AWSChecksumCalculationMode? = nil
) throws -> AWSChecksumCalculationMode {
let fileBasedConfig = try CRTFileBasedConfiguration.make()
let resolvedRequestChecksumCalculation: AWSChecksumCalculationMode
if let requestChecksumCalculation {
resolvedRequestChecksumCalculation = requestChecksumCalculation
} else {
resolvedRequestChecksumCalculation = AWSChecksumsConfig.requestChecksumCalculation(
configValue: nil,
profileName: nil,
fileBasedConfig: fileBasedConfig
)
}
return resolvedRequestChecksumCalculation
}

public static func responseChecksumValidation(
_ responseChecksumValidation: AWSChecksumCalculationMode? = nil
) throws -> AWSChecksumCalculationMode {
let fileBasedConfig = try CRTFileBasedConfiguration.make()
let resolvedResponseChecksumValidation: AWSChecksumCalculationMode
if let responseChecksumValidation {
resolvedResponseChecksumValidation = responseChecksumValidation
} else {
resolvedResponseChecksumValidation = AWSChecksumsConfig.responseChecksumValidation(
configValue: nil,
profileName: nil,
fileBasedConfig: fileBasedConfig
)
}
return resolvedResponseChecksumValidation
}

public static func retryMode(_ retryMode: AWSRetryMode? = nil) throws -> AWSRetryMode {
let fileBasedConfig = try CRTFileBasedConfiguration.make()
let resolvedRetryMode: AWSRetryMode?
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//
// Copyright Amazon.com Inc. or its affiliates.
// All Rights Reserved.
//
// SPDX-License-Identifier: Apache-2.0
//

import enum AWSSDKChecksums.AWSChecksumCalculationMode
@_spi(FileBasedConfig) import AWSSDKCommon

public enum AWSChecksumsConfig {
static func requestChecksumCalculation(
configValue: AWSChecksumCalculationMode?,
profileName: String?,
fileBasedConfig: FileBasedConfiguration
) -> AWSChecksumCalculationMode {
return FieldResolver(
configValue: configValue,
envVarName: "AWS_REQUEST_CHECKSUM_CALCULATION",
configFieldName: "request_checksum_calculation",
fileBasedConfig: fileBasedConfig,
profileName: profileName,
converter: { AWSChecksumCalculationMode(caseInsensitiveRawValue: $0) }
).value ?? .whenSupported
}

static func responseChecksumValidation(
configValue: AWSChecksumCalculationMode?,
profileName: String?,
fileBasedConfig: FileBasedConfiguration
) -> AWSChecksumCalculationMode {
return FieldResolver(
configValue: configValue,
envVarName: "AWS_RESPONSE_CHECKSUM_VALIDATION",
configFieldName: "response_checksum_validation",
fileBasedConfig: fileBasedConfig,
profileName: profileName,
converter: { AWSChecksumCalculationMode(caseInsensitiveRawValue: $0) }
).value ?? .whenSupported
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import SmithyIdentity
import SmithyIdentityAPI
import enum AWSSDKChecksums.AWSChecksumCalculationMode

public protocol AWSDefaultClientConfiguration {
/// The AWS credential identity resolver to be used for AWS credentials.
Expand Down Expand Up @@ -46,6 +47,24 @@ public protocol AWSDefaultClientConfiguration {
/// If set, this value gets used when resolving max attempts value from the standard progression of potential sources. If no value could be resolved, the SDK uses max attempts value of 3 by default.
var maxAttempts: Int? { get set }

/// The AWS request checksum calculation mode to use.
///
/// If `.whenRequired`, the client calculates checksum for the request payload only if the operation requires it.
/// If `.whenSupported`, the client calculates checksum for the request payload if the operation supports it.
///
/// Default mode is `.whenSupported`.
///
/// If no algorithm was chosen and no checksum was provided, CRC32 checksum algorithm is used by default.
var requestChecksumCalculation: AWSChecksumCalculationMode { get set }

/// The AWS response checksum calculation mode to use.
///
/// If `.whenRequired`, the client validates checksum of the response only if the top-level input field for `requestValidationModeMember` is set to `.enabled` and SDK supports the checksum algorithm.
/// If `.whenSupported`, the client validates checksum of the response if the operation supports it and SDK supports at least one of the checksum algorithms returend by service.
///
/// Default mode is `.whenSupported`.
var responseChecksumValidation: AWSChecksumCalculationMode { get set }

/// Specifies whether the endpoint configured via environment variables or shared config file should be used by the service client.
///
/// If `false`, the endpoint for the service client is resolved in the following order:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import enum SmithyChecksumsAPI.ChecksumAlgorithm
import enum SmithyChecksums.ChecksumMismatchException
import enum Smithy.ClientError
import struct Smithy.URIQueryItem
import class Smithy.Context
import struct Foundation.Data
import AwsCommonRuntimeKit
import AWSSDKChecksums
import ClientRuntime
Expand All @@ -18,76 +20,136 @@ public struct FlexibleChecksumsRequestMiddleware<OperationStackInput, OperationS

public let id: String = "FlexibleChecksumsRequestMiddleware"

let requestChecksumRequired: Bool
let checksumAlgorithm: String?

public init(checksumAlgorithm: String?) {
let checksumAlgoHeaderName: String?

public init(
requestChecksumRequired: Bool,
checksumAlgorithm: String?,
checksumAlgoHeaderName: String?
) {
self.requestChecksumRequired = requestChecksumRequired
self.checksumAlgorithm = checksumAlgorithm
self.checksumAlgoHeaderName = checksumAlgoHeaderName
}

private func addHeaders(builder: HTTPRequestBuilder, attributes: Context) async throws {
if case(.stream(let stream)) = builder.body {
attributes.isChunkedEligibleStream = stream.isEligibleForChunkedStreaming
if stream.isEligibleForChunkedStreaming {
try builder.setAwsChunkedHeaders() // x-amz-decoded-content-length
}
}

// Initialize logger
guard let logger = attributes.getLogger() else {
throw ClientError.unknownError("No logger found!")
}

guard let checksumString = checksumAlgorithm else {
logger.info("No checksum provided! Skipping flexible checksums workflow...")
if case(.stream(let stream)) = builder.body {
attributes.isChunkedEligibleStream = stream.isEligibleForChunkedStreaming
if stream.isEligibleForChunkedStreaming {
try builder.setAwsChunkedHeaders() // x-amz-decoded-content-length
}
} else if case(.noStream) = builder.body {
logger.info("Request body is empty. Skipping request checksum calculation...")
return
}

guard let checksumHashFunction = ChecksumAlgorithm.from(string: checksumString) else {
logger.info("Found no supported checksums! Skipping flexible checksums workflow...")
// E.g., prefix for x-amz-checksum-crc32
let checksumHeaderPrefix = "x-amz-checksum-"

if attributes.getFlowType() == .PRESIGN_URL {
// Skip default request checksum calculation logic for PRESIGN_URL flow.
return
}

// Determine the header name
let headerName = "x-amz-checksum-\(checksumHashFunction)"
logger.debug("Resolved checksum header name: \(headerName)")

// Check if any checksum header is already provided by the user
let checksumHeaderPrefix = "x-amz-checksum-"
if builder.headers.headers.contains(where: {
$0.name.lowercased().starts(with: checksumHeaderPrefix) &&
$0.name.lowercased() != "x-amz-checksum-algorithm"
$0.name.lowercased() != checksumAlgoHeaderName?.lowercased()
}) {
logger.debug("Checksum header already provided by the user. Skipping calculation.")
return
}

var checksumHashFunction: ChecksumAlgorithm
if let checksumAlgorithm {
// If checksum algorithm to use was configured via checksum algorithm input member by the user
if let hashFunction = ChecksumAlgorithm.from(string: checksumAlgorithm) {
// If user chose a supported algorithm, continue
checksumHashFunction = hashFunction
} else {
// If user chose an unsupported algorithm, throw error
throw ClientError.invalidValue("Error: Checksum algorithm \(checksumAlgorithm) is not supported!")
}
} else {
// If user didn't choose an algorithm via checksum algorithm input member, then:
if requestChecksumRequired || (attributes.requestChecksumCalculation == .whenSupported) {
// If requestChecksumRequired == true OR RequestChecksumCalculation == when_supported, use CRC32 as default algorithm.
checksumHashFunction = ChecksumAlgorithm.from(string: "crc32")!
logger.info("No algorithm chosen by user. Defaulting to CRC32 checksum algorithm.")
// If the input member tied to `requestAlgorithmMember` has `@httpHeader` trait in model,
// manually set the header with the name from `@httpHeader` trait with SDK's default algorithm: CRC32.
// This needs to be manually added here because user didn't configure checksumAlgorithm but we're sending default checksum.
// If user did set checksumAlgorithm in input, it would've been automatically added to requset as a header in serialize step.
if let checksumAlgoHeaderName {
builder.updateHeader(name: checksumAlgoHeaderName, value: "crc32")
}
} else {
// If requestChecksumRequired == false AND RequestChecksumCalculation == when_required, skip calculation.
logger.info("Checksum not required for the operation.")
logger.info("Client config `requestChecksumCalculation` set to `.whenRequired`")
logger.info("No checksum algorithm chosen by the user. Skipping checksum calculation...")
return
}
}

// Save resolved ChecksumAlgorithm to interceptor context.
attributes.checksum = checksumHashFunction

// Determine the checksum header name
let checksumHashHeaderName = "x-amz-checksum-\(checksumHashFunction)"
logger.debug("Resolved checksum header name: \(checksumHashHeaderName)")

// Handle body vs handle stream
switch builder.body {
case .data(let data):
guard let data else {
throw ClientError.dataNotFound("Cannot calculate checksum of empty body!")
try await calculateAndAddChecksumHeader(data: data)
case .stream(let stream):
if stream.isEligibleForChunkedStreaming {
// Handle calculating and adding checksum header in ChunkedStream
builder.updateHeader(name: "x-amz-trailer", value: [checksumHashHeaderName])
} else {
// If not eligible for chunked streaming, calculate and add checksum to request header now instead of as a trailing header.
let streamBytes: Data?
if stream.isSeekable {
// Need to save current position to reset stream position after reading
let currentPosition = stream.position
try stream.seek(toOffset: 0) // Explicit seek to beginning for correct behavior of FileHandle
streamBytes = try stream.readToEnd()
// Reset stream position to where it was before reading it for checksum calculation
try stream.seek(toOffset: currentPosition)
} else {
streamBytes = try await stream.readToEndAsync()
builder.withBody(.data(streamBytes)) // Reset request body with streamBytes to "refill" it
}
try await calculateAndAddChecksumHeader(data: streamBytes)
}
case .noStream:
// Unreachable block since we return early if .noStream, but it's here for exhaustive switch case
break
}

if builder.headers.value(for: headerName) == nil {
logger.debug("Calculating checksum")
func calculateAndAddChecksumHeader(data: Data?) async throws {
guard let data else {
logger.info("Request body is empty. Skipping request checksum calculation...")
return
}
if builder.headers.value(for: checksumHashHeaderName) == nil {
logger.debug("Calculating request checksum")
}

// Create checksum instance
let checksum = checksumHashFunction.createChecksum()

// Pass data to hash
try checksum.update(chunk: data)

// Retrieve the hash
let hash = try checksum.digest().toBase64String()

builder.updateHeader(name: headerName, value: [hash])
case .stream:
// Will handle calculating checksum and setting header later
attributes.checksum = checksumHashFunction
builder.updateHeader(name: "x-amz-trailer", value: [headerName])
case .noStream:
throw ClientError.dataNotFound("Cannot calculate the checksum of an empty body!")
builder.updateHeader(name: checksumHashHeaderName, value: [hash])
}
}
}
Expand Down
Loading

0 comments on commit 906d432

Please sign in to comment.