diff --git a/pyproject.toml b/pyproject.toml index 2f271caf5..cb0aaf908 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "requests>=2.32.3", "sentry-sdk>=2.18.0", "sqlalchemy<2", + "zstandard==0.23.0", ] [build-system] diff --git a/shared/storage/minio.py b/shared/storage/minio.py index bb65c5d5b..bb0002412 100644 --- a/shared/storage/minio.py +++ b/shared/storage/minio.py @@ -1,13 +1,15 @@ -import gzip +import datetime import json import logging import os -import shutil import sys import tempfile from io import BytesIO -from typing import BinaryIO, overload +from typing import BinaryIO, Protocol, overload +import sentry_sdk +import sentry_sdk.scope +import zstandard from minio import Minio from minio.credentials import ( ChainedProvider, @@ -17,13 +19,29 @@ ) from minio.deleteobjects import DeleteObject from minio.error import MinioException, S3Error +from urllib3.response import HTTPResponse -from shared.storage.base import CHUNK_SIZE, BaseStorageService +from shared.storage.base import BaseStorageService from shared.storage.exceptions import BucketAlreadyExistsError, FileNotInStorageError log = logging.getLogger(__name__) +class Readable(Protocol): + def read(self, size: int = -1) -> bytes: ... + + +class GetObjectToFileResponse(Protocol): + bucket_name: str + object_name: str + last_modified: datetime.datetime | None + etag: str + size: int + content_type: str | None + metadata: dict[str, str] + version_id: str | None + + # Service class for interfacing with codecov's underlying storage layer, minio class MinioStorageService(BaseStorageService): def __init__(self, minio_config): @@ -57,20 +75,21 @@ def init_minio_client( region: str = None, ): """ - Initialize the minio client + Initialize the minio client `iam_auth` adds support for IAM base authentication in a fallback pattern. - The following will be checked in order: + The following will be checked in order: * EC2 metadata -- a custom endpoint can be provided, default is None. - * AWS env vars, specifically AWS_ACCESS_KEY and AWS_SECRECT_KEY * Minio env vars, specifically MINIO_ACCESS_KEY and MINIO_SECRET_KEY + * AWS env vars, specifically AWS_ACCESS_KEY and AWS_SECRECT_KEY - to support backward compatibility, the iam_auth setting should be used in the installation - configuration + to support backward compatibility, the iam_auth setting should be used + in the installation configuration Args: host (str): The address of the host where minio lives + port (str): The port number (as str or int should be ok) access_key (str, optional): The access key (optional if IAM is being used) secret_key (str, optional): The secret key (optional if IAM is being used) @@ -143,50 +162,64 @@ def create_root_storage(self, bucket_name="archive", region="us-east-1"): # Writes a file to storage will gzip if not compressed already def write_file( self, - bucket_name, - path, - data, - reduced_redundancy=False, + bucket_name: str, + path: str, + data: BinaryIO, + reduced_redundancy: bool = False, *, - is_already_gzipped: bool = False, + is_already_gzipped: bool = False, # deprecated + is_compressed: bool = False, + compression_type: str = "zstd", ): + if is_already_gzipped: + log.warning( + "is_already_gzipped is deprecated and will be removed in a future version, instead compress using zstd and use the is_already_zstd_compressed argument" + ) + with sentry_sdk.new_scope() as scope: + scope.set_extra("bucket_name", bucket_name) + scope.set_extra("path", path) + sentry_sdk.capture_message("is_already_gzipped passed with True") + is_compressed = True + compression_type = "gzip" + if isinstance(data, str): - data = data.encode() + log.warning( + "passing data as a str to write_file is deprecated and will be removed in a future version, instead pass an object compliant with the BinaryIO type" + ) + with sentry_sdk.new_scope() as scope: + scope.set_extra("bucket_name", bucket_name) + scope.set_extra("path", path) + sentry_sdk.capture_message("write_file data argument passed as str") - if isinstance(data, bytes): - if not is_already_gzipped: - out = BytesIO() - with gzip.GzipFile(fileobj=out, mode="w", compresslevel=9) as gz: - gz.write(data) - else: - out = BytesIO(data) - - # get file size - out.seek(0, os.SEEK_END) - out_size = out.tell() - else: - # data is already a file-like object - if not is_already_gzipped: - _, filename = tempfile.mkstemp() - with gzip.open(filename, "wb") as f: - shutil.copyfileobj(data, f) - out = open(filename, "rb") - else: - out = data + data = BytesIO(data.encode()) - out_size = os.stat(filename).st_size + if not is_compressed: + cctx = zstandard.ZstdCompressor() + reader: zstandard.ZstdCompressionReader = cctx.stream_reader(data) + _, filepath = tempfile.mkstemp() + with open(filepath, "wb") as f: + while chunk := reader.read(16384): + f.write(chunk) + data = open(filepath, "rb") try: - # reset pos for minio reading. - out.seek(0) + out_size = data.seek(0, os.SEEK_END) + data.seek(0) + + if compression_type == "gzip": + content_encoding = "gzip" + elif compression_type == "zstd": + content_encoding = "zstd" + + headers = {"Content-Encoding": content_encoding} - headers = {"Content-Encoding": "gzip"} if reduced_redundancy: headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY" + self.minio_client.put_object( bucket_name, path, - out, + data, out_size, metadata=headers, content_type="text/plain", @@ -195,25 +228,65 @@ def write_file( except MinioException: raise + finally: + if not is_compressed: + data.close() + os.unlink(filepath) @overload - def read_file(self, bucket_name: str, path: str) -> bytes: ... + def read_file( + self, bucket_name: str, path: str, file_obj: None = None + ) -> bytes: ... @overload - def read_file(self, bucket_name: str, path: str, file_obj: BinaryIO) -> None: ... + def read_file(self, bucket_name: str, path: str, file_obj: str) -> None: ... def read_file(self, bucket_name, path, file_obj=None) -> bytes | None: try: - res = self.minio_client.get_object(bucket_name, path) - if file_obj is None: - data = BytesIO() - for d in res.stream(CHUNK_SIZE): - data.write(d) - data.seek(0) - return data.getvalue() + headers = {"Accept-Encoding": "gzip, zstd"} + if file_obj: + _, tmpfilepath = tempfile.mkstemp() + to_file_response: GetObjectToFileResponse = ( + self.minio_client.fget_object( + bucket_name, path, tmpfilepath, request_headers=headers + ) + ) + data = open(tmpfilepath, "rb") + content_encoding = to_file_response.metadata.get( + "Content-Encoding", None + ) + else: + response: HTTPResponse = self.minio_client.get_object( + bucket_name, path, request_headers=headers + ) + data = response + content_encoding = response.headers.get("Content-Encoding", None) + + reader: Readable | None = None + if content_encoding == "gzip": + # HTTPResponse automatically decodes gzipped data for us + # minio_client.fget_object uses HTTPResponse under the hood, + # so this applies to both get_object and fget_object + reader = data + elif content_encoding == "zstd": + # we have to manually decompress zstandard compressed data + cctx = zstandard.ZstdDecompressor() + reader = cctx.stream_reader(data) + else: + with sentry_sdk.new_scope() as scope: + scope.set_extra("bucket_name", bucket_name) + scope.set_extra("path", path) + raise ValueError("Blob does not have Content-Encoding set") + + if file_obj: + while chunk := reader.read(16384): + file_obj.write(chunk) + return None else: - for d in res.stream(CHUNK_SIZE): - file_obj.write(d) + res = BytesIO() + while chunk := reader.read(16384): + res.write(chunk) + return res.getvalue() except S3Error as e: if e.code == "NoSuchKey": raise FileNotInStorageError( @@ -222,6 +295,10 @@ def read_file(self, bucket_name, path, file_obj=None) -> bytes | None: raise e except MinioException: raise + finally: + if file_obj: + data.close() + os.unlink(tmpfilepath) """ Deletes file url in specified bucket. diff --git a/tests/unit/storage/__init__.py b/tests/unit/storage/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/storage/test_minio.py b/tests/unit/storage/test_minio.py index 50fe478db..00a32104f 100644 --- a/tests/unit/storage/test_minio.py +++ b/tests/unit/storage/test_minio.py @@ -1,231 +1,310 @@ -import os +import gzip import tempfile +from io import BytesIO +from uuid import uuid4 import pytest +import zstandard from shared.storage.exceptions import BucketAlreadyExistsError, FileNotInStorageError from shared.storage.minio import MinioStorageService -from tests.base import BaseTestCase - -minio_config = { - "access_key_id": "codecov-default-key", - "secret_access_key": "codecov-default-secret", - "verify_ssl": False, - "host": "minio", - "port": "9000", - "iam_auth": False, - "iam_endpoint": None, -} - - -class TestMinioStorageService(BaseTestCase): - def test_create_bucket(self, codecov_vcr): - storage = MinioStorageService(minio_config) - bucket_name = "archivetest" - res = storage.create_root_storage(bucket_name, region="") - assert res == {"name": "archivetest"} - - def test_create_bucket_already_exists(self, codecov_vcr): - storage = MinioStorageService(minio_config) - bucket_name = "alreadyexists" - storage.create_root_storage(bucket_name) - with pytest.raises(BucketAlreadyExistsError): - storage.create_root_storage(bucket_name) - - def test_write_then_read_file(self, codecov_vcr): - storage = MinioStorageService(minio_config) - path = "test_write_then_read_file/result" - data = "lorem ipsum dolor test_write_then_read_file á" - bucket_name = "archivetest" - writing_result = storage.write_file(bucket_name, path, data) - assert writing_result - reading_result = storage.read_file(bucket_name, path) - assert reading_result.decode() == data - - def test_write_then_read_file_obj(self, codecov_vcr): - storage = MinioStorageService(minio_config) - path = "test_write_then_read_file/result" - data = "lorem ipsum dolor test_write_then_read_file á" - _, local_path = tempfile.mkstemp() - with open(local_path, "w") as f: - f.write(data) - f = open(local_path, "rb") - bucket_name = "archivetest" - writing_result = storage.write_file(bucket_name, path, f) - assert writing_result - - _, local_path = tempfile.mkstemp() - with open(local_path, "wb") as f: - storage.read_file(bucket_name, path, file_obj=f) - with open(local_path, "rb") as f: - assert f.read().decode() == data - - def test_read_file_does_not_exist(self, request, codecov_vcr): - storage = MinioStorageService(minio_config) - path = f"{request.node.name}/does_not_exist.txt" - bucket_name = "archivetest" - with pytest.raises(FileNotInStorageError): - storage.read_file(bucket_name, path) - - def test_write_then_delete_file(self, request, codecov_vcr): - storage = MinioStorageService(minio_config) - path = f"{request.node.name}/result.txt" - data = "lorem ipsum dolor test_write_then_read_file á" - bucket_name = "archivetest" - writing_result = storage.write_file(bucket_name, path, data) - assert writing_result - deletion_result = storage.delete_file(bucket_name, path) - assert deletion_result is True - with pytest.raises(FileNotInStorageError): - storage.read_file(bucket_name, path) - - def test_delete_file_doesnt_exist(self, request, codecov_vcr): - storage = MinioStorageService(minio_config) - path = f"{request.node.name}/result.txt" - bucket_name = "archivetest" - storage.delete_file(bucket_name, path) - - def test_batch_delete_files(self, request, codecov_vcr): - storage = MinioStorageService(minio_config) - path_1 = f"{request.node.name}/result_1.txt" - path_2 = f"{request.node.name}/result_2.txt" - path_3 = f"{request.node.name}/result_3.txt" - paths = [path_1, path_2, path_3] - data = "lorem ipsum dolor test_write_then_read_file á" - bucket_name = "archivetest" - storage.write_file(bucket_name, path_1, data) - storage.write_file(bucket_name, path_3, data) - deletion_result = storage.delete_files(bucket_name, paths) - assert deletion_result == [True, True, True] - for p in paths: - with pytest.raises(FileNotInStorageError): - storage.read_file(bucket_name, p) - - def test_list_folder_contents(self, request, codecov_vcr): - storage = MinioStorageService(minio_config) - path_1 = f"thiago/{request.node.name}/result_1.txt" - path_2 = f"thiago/{request.node.name}/result_2.txt" - path_3 = f"thiago/{request.node.name}/result_3.txt" - path_4 = f"thiago/{request.node.name}/f1/result_1.txt" - path_5 = f"thiago/{request.node.name}/f1/result_2.txt" - path_6 = f"thiago/{request.node.name}/f1/result_3.txt" - all_paths = [path_1, path_2, path_3, path_4, path_5, path_6] - bucket_name = "archivetest" - for i, p in enumerate(all_paths): - data = f"Lorem ipsum on file {p} for {i * 'po'}" - storage.write_file(bucket_name, p, data) - results_1 = list( - storage.list_folder_contents(bucket_name, f"thiago/{request.node.name}") - ) - expected_result_1 = [ - {"name": path_1, "size": 84}, - {"name": path_2, "size": 86}, - {"name": path_3, "size": 87}, - {"name": path_4, "size": 88}, - {"name": path_5, "size": 89}, - {"name": path_6, "size": 90}, - ] - assert sorted(expected_result_1, key=lambda x: x["size"]) == sorted( - results_1, key=lambda x: x["size"] - ) - results_2 = list( - storage.list_folder_contents(bucket_name, f"thiago/{request.node.name}/f1") - ) - expected_result_2 = [ - {"name": path_4, "size": 88}, - {"name": path_5, "size": 89}, - {"name": path_6, "size": 90}, - ] - assert sorted(expected_result_2, key=lambda x: x["size"]) == sorted( - results_2, key=lambda x: x["size"] - ) - """ - Since we cannot rely on `Chain` in the underlying implementation - we cannot ''trick'' minio into using the IAM auth flow while testing, - and therefore have to actually be running on an AWS instance. - We can unskip this test after minio fixes their credential - chain problem - """ - - @pytest.mark.skip(reason="Skipping because minio IAM is currently untestable.") - def test_minio_with_iam_flow(self, codecov_vcr, mocker): - mocker.patch.dict( - os.environ, - { - "MINIO_ACCESS_KEY": "codecov-default-key", - "MINIO_SECRET_KEY": "codecov-default-secret", - }, - ) - minio_iam_config = { +BUCKET_NAME = "archivetest" + + +def make_storage() -> MinioStorageService: + return MinioStorageService( + { "access_key_id": "codecov-default-key", "secret_access_key": "codecov-default-secret", "verify_ssl": False, "host": "minio", "port": "9000", - "iam_auth": True, + "iam_auth": False, "iam_endpoint": None, } - bucket_name = "testminiowithiamflow" - storage = MinioStorageService(minio_iam_config) + ) + + +def ensure_bucket(storage: MinioStorageService): + try: + storage.create_root_storage(BUCKET_NAME) + except Exception: + pass + + +def test_create_bucket(): + storage = make_storage() + bucket_name = uuid4().hex + + res = storage.create_root_storage(bucket_name, region="") + assert res == {"name": bucket_name} + + +def test_create_bucket_already_exists(): + storage = make_storage() + bucket_name = uuid4().hex + + storage.create_root_storage(bucket_name) + with pytest.raises(BucketAlreadyExistsError): storage.create_root_storage(bucket_name) - path = "test_write_then_read_file/result" - data = "lorem ipsum dolor test_write_then_read_file á" - writing_result = storage.write_file(bucket_name, path, data) - assert writing_result - reading_result = storage.read_file(bucket_name, path) - assert reading_result.decode() == data - - def test_minio_without_ports(self, mocker): - mocked_minio_client = mocker.patch("shared.storage.minio.Minio") - minio_no_ports_config = { - "access_key_id": "hodor", - "secret_access_key": "haha", - "verify_ssl": False, - "host": "cute_url_no_ports", - "iam_auth": True, - "iam_endpoint": None, - } - storage = MinioStorageService(minio_no_ports_config) - assert storage.minio_config == minio_no_ports_config - mocked_minio_client.assert_called_with( - "cute_url_no_ports", credentials=mocker.ANY, secure=False, region=None - ) - def test_minio_with_ports(self, mocker): - mocked_minio_client = mocker.patch("shared.storage.minio.Minio") - minio_no_ports_config = { - "access_key_id": "hodor", - "secret_access_key": "haha", - "verify_ssl": False, - "host": "cute_url_no_ports", - "port": "9000", - "iam_auth": True, - "iam_endpoint": None, - } - storage = MinioStorageService(minio_no_ports_config) - assert storage.minio_config == minio_no_ports_config - mocked_minio_client.assert_called_with( - "cute_url_no_ports:9000", credentials=mocker.ANY, secure=False, region=None + +def test_write_then_read_file(): + storage = make_storage() + path = f"test_write_then_read_file/{uuid4().hex}" + data = "lorem ipsum dolor test_write_then_read_file á" + + ensure_bucket(storage) + writing_result = storage.write_file(BUCKET_NAME, path, data) + assert writing_result + reading_result = storage.read_file(BUCKET_NAME, path) + assert reading_result.decode() == data + + +def test_write_then_read_file_already_gzipped(): + storage = make_storage() + path = f"test_write_then_read_file_already_gzipped/{uuid4().hex}" + data = BytesIO( + gzip.compress("lorem ipsum dolor test_write_then_read_file á".encode()) + ) + + ensure_bucket(storage) + writing_result = storage.write_file( + BUCKET_NAME, path, data, is_already_gzipped=True + ) + assert writing_result + reading_result = storage.read_file(BUCKET_NAME, path) + assert reading_result.decode() == "lorem ipsum dolor test_write_then_read_file á" + + +def test_write_then_read_file_already_zstd(): + storage = make_storage() + path = f"test_write_then_read_file_already_zstd/{uuid4().hex}" + data = BytesIO( + zstandard.compress("lorem ipsum dolor test_write_then_read_file á".encode()) + ) + + ensure_bucket(storage) + writing_result = storage.write_file( + BUCKET_NAME, path, data, compression_type="zstd", is_compressed=True + ) + assert writing_result + reading_result = storage.read_file(BUCKET_NAME, path) + assert reading_result.decode() == "lorem ipsum dolor test_write_then_read_file á" + + +def test_write_then_read_file_obj(): + storage = make_storage() + path = f"test_write_then_read_file/{uuid4().hex}" + data = "lorem ipsum dolor test_write_then_read_file á" + + ensure_bucket(storage) + + _, local_path = tempfile.mkstemp() + with open(local_path, "w") as f: + f.write(data) + with open(local_path, "rb") as f: + writing_result = storage.write_file(BUCKET_NAME, path, f) + assert writing_result + + _, local_path = tempfile.mkstemp() + with open(local_path, "wb") as f: + storage.read_file(BUCKET_NAME, path, file_obj=f) + with open(local_path, "rb") as f: + assert f.read().decode() == data + + +def test_write_then_read_file_obj_already_gzipped(): + storage = make_storage() + path = f"test_write_then_read_file_obj_already_gzipped/{uuid4().hex}" + data = BytesIO( + gzip.compress("lorem ipsum dolor test_write_then_read_file á".encode()) + ) + + ensure_bucket(storage) + + _, local_path = tempfile.mkstemp() + with open(local_path, "wb") as f: + f.write(data.getvalue()) + with open(local_path, "rb") as f: + writing_result = storage.write_file( + BUCKET_NAME, path, f, is_already_gzipped=True ) + assert writing_result - def test_minio_with_region(self, mocker): - mocked_minio_client = mocker.patch("shared.storage.minio.Minio") - minio_no_ports_config = { - "access_key_id": "hodor", - "secret_access_key": "haha", - "verify_ssl": False, - "host": "cute_url_no_ports", - "port": "9000", - "iam_auth": True, - "iam_endpoint": None, - "region": "example", - } - storage = MinioStorageService(minio_no_ports_config) - assert storage.minio_config == minio_no_ports_config - mocked_minio_client.assert_called_with( - "cute_url_no_ports:9000", - credentials=mocker.ANY, - secure=False, - region="example", + _, local_path = tempfile.mkstemp() + with open(local_path, "wb") as f: + storage.read_file(BUCKET_NAME, path, file_obj=f) + with open(local_path, "rb") as f: + assert f.read().decode() == "lorem ipsum dolor test_write_then_read_file á" + + +def test_write_then_read_file_obj_already_zstd(): + storage = make_storage() + path = f"test_write_then_read_file_obj_already_zstd/{uuid4().hex}" + data = BytesIO( + zstandard.compress("lorem ipsum dolor test_write_then_read_file á".encode()) + ) + + ensure_bucket(storage) + + _, local_path = tempfile.mkstemp() + with open(local_path, "wb") as f: + f.write(data.getvalue()) + with open(local_path, "rb") as f: + writing_result = storage.write_file( + BUCKET_NAME, path, f, is_compressed=True, compression_type="zstd" ) + assert writing_result + + _, local_path = tempfile.mkstemp() + with open(local_path, "wb") as f: + storage.read_file(BUCKET_NAME, path, file_obj=f) + with open(local_path, "rb") as f: + assert f.read().decode() == "lorem ipsum dolor test_write_then_read_file á" + + +def test_read_file_does_not_exist(): + storage = make_storage() + path = f"test_read_file_does_not_exist/{uuid4().hex}" + + ensure_bucket(storage) + with pytest.raises(FileNotInStorageError): + storage.read_file(BUCKET_NAME, path) + + +def test_write_then_delete_file(): + storage = make_storage() + path = f"test_write_then_delete_file/{uuid4().hex}" + data = "lorem ipsum dolor test_write_then_delete_file á" + + ensure_bucket(storage) + writing_result = storage.write_file(BUCKET_NAME, path, data) + assert writing_result + + deletion_result = storage.delete_file(BUCKET_NAME, path) + assert deletion_result is True + with pytest.raises(FileNotInStorageError): + storage.read_file(BUCKET_NAME, path) + + +def test_batch_delete_files(): + storage = make_storage() + path = f"test_batch_delete_files/{uuid4().hex}" + path_1 = f"{path}/result_1.txt" + path_2 = f"{path}/result_2.txt" + path_3 = f"{path}/result_3.txt" + paths = [path_1, path_2, path_3] + data = "lorem ipsum dolor test_batch_delete_files á" + + ensure_bucket(storage) + storage.write_file(BUCKET_NAME, path_1, data) + storage.write_file(BUCKET_NAME, path_3, data) + + deletion_result = storage.delete_files(BUCKET_NAME, paths) + assert deletion_result == [True, True, True] + for p in paths: + with pytest.raises(FileNotInStorageError): + storage.read_file(BUCKET_NAME, p) + + +def test_list_folder_contents(): + storage = make_storage() + path = f"test_list_folder_contents/{uuid4().hex}" + path_1 = "/result_1.txt" + path_2 = "/result_2.txt" + path_3 = "/result_3.txt" + path_4 = "/x1/result_1.txt" + path_5 = "/x1/result_2.txt" + path_6 = "/x1/result_3.txt" + all_paths = [path_1, path_2, path_3, path_4, path_5, path_6] + + ensure_bucket(storage) + for i, p in enumerate(all_paths): + data = f"Lorem ipsum on file {p} for {i * 'po'}" + storage.write_file(BUCKET_NAME, f"{path}{p}", data) + + results_1 = sorted( + storage.list_folder_contents(BUCKET_NAME, path), + key=lambda x: x["name"], + ) + # NOTE: the `size` here is actually the compressed (currently gzip) size + assert results_1 == [ + {"name": f"{path}{path_1}", "size": 47}, + {"name": f"{path}{path_2}", "size": 49}, + {"name": f"{path}{path_3}", "size": 51}, + {"name": f"{path}{path_4}", "size": 56}, + {"name": f"{path}{path_5}", "size": 58}, + {"name": f"{path}{path_6}", "size": 60}, + ] + + results_2 = sorted( + storage.list_folder_contents(BUCKET_NAME, f"{path}/x1"), + key=lambda x: x["name"], + ) + assert results_2 == [ + {"name": f"{path}{path_4}", "size": 56}, + {"name": f"{path}{path_5}", "size": 58}, + {"name": f"{path}{path_6}", "size": 60}, + ] + + +def test_minio_without_ports(mocker): + mocked_minio_client = mocker.patch("shared.storage.minio.Minio") + minio_no_ports_config = { + "access_key_id": "hodor", + "secret_access_key": "haha", + "verify_ssl": False, + "host": "cute_url_no_ports", + "iam_auth": True, + "iam_endpoint": None, + } + + storage = MinioStorageService(minio_no_ports_config) + assert storage.minio_config == minio_no_ports_config + mocked_minio_client.assert_called_with( + "cute_url_no_ports", credentials=mocker.ANY, secure=False, region=None + ) + + +def test_minio_with_ports(mocker): + mocked_minio_client = mocker.patch("shared.storage.minio.Minio") + minio_no_ports_config = { + "access_key_id": "hodor", + "secret_access_key": "haha", + "verify_ssl": False, + "host": "cute_url_no_ports", + "port": "9000", + "iam_auth": True, + "iam_endpoint": None, + } + + storage = MinioStorageService(minio_no_ports_config) + assert storage.minio_config == minio_no_ports_config + mocked_minio_client.assert_called_with( + "cute_url_no_ports:9000", credentials=mocker.ANY, secure=False, region=None + ) + + +def test_minio_with_region(mocker): + mocked_minio_client = mocker.patch("shared.storage.minio.Minio") + minio_no_ports_config = { + "access_key_id": "hodor", + "secret_access_key": "haha", + "verify_ssl": False, + "host": "cute_url_no_ports", + "port": "9000", + "iam_auth": True, + "iam_endpoint": None, + "region": "example", + } + + storage = MinioStorageService(minio_no_ports_config) + assert storage.minio_config == minio_no_ports_config + mocked_minio_client.assert_called_with( + "cute_url_no_ports:9000", + credentials=mocker.ANY, + secure=False, + region="example", + ) diff --git a/uv.lock b/uv.lock index a9e0cbfb1..f94cccc8a 100644 --- a/uv.lock +++ b/uv.lock @@ -1430,6 +1430,7 @@ dependencies = [ { name = "requests" }, { name = "sentry-sdk" }, { name = "sqlalchemy" }, + { name = "zstandard" }, ] [package.dev-dependencies] @@ -1483,6 +1484,7 @@ requires-dist = [ { name = "requests", specifier = ">=2.32.3" }, { name = "sentry-sdk", specifier = ">=2.18.0" }, { name = "sqlalchemy", specifier = "<2" }, + { name = "zstandard", specifier = "==0.23.0" }, ] [package.metadata.requires-dev] @@ -1686,3 +1688,46 @@ sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e wheels = [ { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 }, ] + +[[package]] +name = "zstandard" +version = "0.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation == 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/f6/2ac0287b442160a89d726b17a9184a4c615bb5237db763791a7fd16d9df1/zstandard-0.23.0.tar.gz", hash = "sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09", size = 681701 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/83/f23338c963bd9de687d47bf32efe9fd30164e722ba27fb59df33e6b1719b/zstandard-0.23.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4567955a6bc1b20e9c31612e615af6b53733491aeaa19a6b3b37f3b65477094", size = 788713 }, + { url = "https://files.pythonhosted.org/packages/5b/b3/1a028f6750fd9227ee0b937a278a434ab7f7fdc3066c3173f64366fe2466/zstandard-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e172f57cd78c20f13a3415cc8dfe24bf388614324d25539146594c16d78fcc8", size = 633459 }, + { url = "https://files.pythonhosted.org/packages/26/af/36d89aae0c1f95a0a98e50711bc5d92c144939efc1f81a2fcd3e78d7f4c1/zstandard-0.23.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0e166f698c5a3e914947388c162be2583e0c638a4703fc6a543e23a88dea3c1", size = 4945707 }, + { url = "https://files.pythonhosted.org/packages/cd/2e/2051f5c772f4dfc0aae3741d5fc72c3dcfe3aaeb461cc231668a4db1ce14/zstandard-0.23.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12a289832e520c6bd4dcaad68e944b86da3bad0d339ef7989fb7e88f92e96072", size = 5306545 }, + { url = "https://files.pythonhosted.org/packages/0a/9e/a11c97b087f89cab030fa71206963090d2fecd8eb83e67bb8f3ffb84c024/zstandard-0.23.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d50d31bfedd53a928fed6707b15a8dbeef011bb6366297cc435accc888b27c20", size = 5337533 }, + { url = "https://files.pythonhosted.org/packages/fc/79/edeb217c57fe1bf16d890aa91a1c2c96b28c07b46afed54a5dcf310c3f6f/zstandard-0.23.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72c68dda124a1a138340fb62fa21b9bf4848437d9ca60bd35db36f2d3345f373", size = 5436510 }, + { url = "https://files.pythonhosted.org/packages/81/4f/c21383d97cb7a422ddf1ae824b53ce4b51063d0eeb2afa757eb40804a8ef/zstandard-0.23.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53dd9d5e3d29f95acd5de6802e909ada8d8d8cfa37a3ac64836f3bc4bc5512db", size = 4859973 }, + { url = "https://files.pythonhosted.org/packages/ab/15/08d22e87753304405ccac8be2493a495f529edd81d39a0870621462276ef/zstandard-0.23.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6a41c120c3dbc0d81a8e8adc73312d668cd34acd7725f036992b1b72d22c1772", size = 4936968 }, + { url = "https://files.pythonhosted.org/packages/eb/fa/f3670a597949fe7dcf38119a39f7da49a8a84a6f0b1a2e46b2f71a0ab83f/zstandard-0.23.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:40b33d93c6eddf02d2c19f5773196068d875c41ca25730e8288e9b672897c105", size = 5467179 }, + { url = "https://files.pythonhosted.org/packages/4e/a9/dad2ab22020211e380adc477a1dbf9f109b1f8d94c614944843e20dc2a99/zstandard-0.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9206649ec587e6b02bd124fb7799b86cddec350f6f6c14bc82a2b70183e708ba", size = 4848577 }, + { url = "https://files.pythonhosted.org/packages/08/03/dd28b4484b0770f1e23478413e01bee476ae8227bbc81561f9c329e12564/zstandard-0.23.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76e79bc28a65f467e0409098fa2c4376931fd3207fbeb6b956c7c476d53746dd", size = 4693899 }, + { url = "https://files.pythonhosted.org/packages/2b/64/3da7497eb635d025841e958bcd66a86117ae320c3b14b0ae86e9e8627518/zstandard-0.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:66b689c107857eceabf2cf3d3fc699c3c0fe8ccd18df2219d978c0283e4c508a", size = 5199964 }, + { url = "https://files.pythonhosted.org/packages/43/a4/d82decbab158a0e8a6ebb7fc98bc4d903266bce85b6e9aaedea1d288338c/zstandard-0.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9c236e635582742fee16603042553d276cca506e824fa2e6489db04039521e90", size = 5655398 }, + { url = "https://files.pythonhosted.org/packages/f2/61/ac78a1263bc83a5cf29e7458b77a568eda5a8f81980691bbc6eb6a0d45cc/zstandard-0.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8fffdbd9d1408006baaf02f1068d7dd1f016c6bcb7538682622c556e7b68e35", size = 5191313 }, + { url = "https://files.pythonhosted.org/packages/e7/54/967c478314e16af5baf849b6ee9d6ea724ae5b100eb506011f045d3d4e16/zstandard-0.23.0-cp312-cp312-win32.whl", hash = "sha256:dc1d33abb8a0d754ea4763bad944fd965d3d95b5baef6b121c0c9013eaf1907d", size = 430877 }, + { url = "https://files.pythonhosted.org/packages/75/37/872d74bd7739639c4553bf94c84af7d54d8211b626b352bc57f0fd8d1e3f/zstandard-0.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:64585e1dba664dc67c7cdabd56c1e5685233fbb1fc1966cfba2a340ec0dfff7b", size = 495595 }, + { url = "https://files.pythonhosted.org/packages/80/f1/8386f3f7c10261fe85fbc2c012fdb3d4db793b921c9abcc995d8da1b7a80/zstandard-0.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:576856e8594e6649aee06ddbfc738fec6a834f7c85bf7cadd1c53d4a58186ef9", size = 788975 }, + { url = "https://files.pythonhosted.org/packages/16/e8/cbf01077550b3e5dc86089035ff8f6fbbb312bc0983757c2d1117ebba242/zstandard-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38302b78a850ff82656beaddeb0bb989a0322a8bbb1bf1ab10c17506681d772a", size = 633448 }, + { url = "https://files.pythonhosted.org/packages/06/27/4a1b4c267c29a464a161aeb2589aff212b4db653a1d96bffe3598f3f0d22/zstandard-0.23.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2240ddc86b74966c34554c49d00eaafa8200a18d3a5b6ffbf7da63b11d74ee2", size = 4945269 }, + { url = "https://files.pythonhosted.org/packages/7c/64/d99261cc57afd9ae65b707e38045ed8269fbdae73544fd2e4a4d50d0ed83/zstandard-0.23.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ef230a8fd217a2015bc91b74f6b3b7d6522ba48be29ad4ea0ca3a3775bf7dd5", size = 5306228 }, + { url = "https://files.pythonhosted.org/packages/7a/cf/27b74c6f22541f0263016a0fd6369b1b7818941de639215c84e4e94b2a1c/zstandard-0.23.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:774d45b1fac1461f48698a9d4b5fa19a69d47ece02fa469825b442263f04021f", size = 5336891 }, + { url = "https://files.pythonhosted.org/packages/fa/18/89ac62eac46b69948bf35fcd90d37103f38722968e2981f752d69081ec4d/zstandard-0.23.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f77fa49079891a4aab203d0b1744acc85577ed16d767b52fc089d83faf8d8ed", size = 5436310 }, + { url = "https://files.pythonhosted.org/packages/a8/a8/5ca5328ee568a873f5118d5b5f70d1f36c6387716efe2e369010289a5738/zstandard-0.23.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac184f87ff521f4840e6ea0b10c0ec90c6b1dcd0bad2f1e4a9a1b4fa177982ea", size = 4859912 }, + { url = "https://files.pythonhosted.org/packages/ea/ca/3781059c95fd0868658b1cf0440edd832b942f84ae60685d0cfdb808bca1/zstandard-0.23.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c363b53e257246a954ebc7c488304b5592b9c53fbe74d03bc1c64dda153fb847", size = 4936946 }, + { url = "https://files.pythonhosted.org/packages/ce/11/41a58986f809532742c2b832c53b74ba0e0a5dae7e8ab4642bf5876f35de/zstandard-0.23.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e7792606d606c8df5277c32ccb58f29b9b8603bf83b48639b7aedf6df4fe8171", size = 5466994 }, + { url = "https://files.pythonhosted.org/packages/83/e3/97d84fe95edd38d7053af05159465d298c8b20cebe9ccb3d26783faa9094/zstandard-0.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0817825b900fcd43ac5d05b8b3079937073d2b1ff9cf89427590718b70dd840", size = 4848681 }, + { url = "https://files.pythonhosted.org/packages/6e/99/cb1e63e931de15c88af26085e3f2d9af9ce53ccafac73b6e48418fd5a6e6/zstandard-0.23.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9da6bc32faac9a293ddfdcb9108d4b20416219461e4ec64dfea8383cac186690", size = 4694239 }, + { url = "https://files.pythonhosted.org/packages/ab/50/b1e703016eebbc6501fc92f34db7b1c68e54e567ef39e6e59cf5fb6f2ec0/zstandard-0.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fd7699e8fd9969f455ef2926221e0233f81a2542921471382e77a9e2f2b57f4b", size = 5200149 }, + { url = "https://files.pythonhosted.org/packages/aa/e0/932388630aaba70197c78bdb10cce2c91fae01a7e553b76ce85471aec690/zstandard-0.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d477ed829077cd945b01fc3115edd132c47e6540ddcd96ca169facff28173057", size = 5655392 }, + { url = "https://files.pythonhosted.org/packages/02/90/2633473864f67a15526324b007a9f96c96f56d5f32ef2a56cc12f9548723/zstandard-0.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33", size = 5191299 }, + { url = "https://files.pythonhosted.org/packages/b0/4c/315ca5c32da7e2dc3455f3b2caee5c8c2246074a61aac6ec3378a97b7136/zstandard-0.23.0-cp313-cp313-win32.whl", hash = "sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd", size = 430862 }, + { url = "https://files.pythonhosted.org/packages/a2/bf/c6aaba098e2d04781e8f4f7c0ba3c7aa73d00e4c436bcc0cf059a66691d1/zstandard-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b", size = 495578 }, +]