diff --git a/src/kili/core/constants.py b/src/kili/core/constants.py index 8e72ae029..2e4d2de31 100644 --- a/src/kili/core/constants.py +++ b/src/kili/core/constants.py @@ -32,6 +32,10 @@ mime_extensions_for_py_scripts = ["text/x-python"] mime_extensions_for_txt_files = ["text/plain"] +# These extensions require post-processing either because they can contain geospatial metadata, +# or because they are not directly supported by the browser. +mime_extensions_that_need_post_processing = ["application/vnd.nitf", "image/jp2", "image/tiff"] + QUERY_BATCH_SIZE = 100 MUTATION_BATCH_SIZE = 100 MAX_CALLS_PER_MINUTE = 500 diff --git a/src/kili/core/helpers.py b/src/kili/core/helpers.py index 08abd19fd..9bc6ad816 100644 --- a/src/kili/core/helpers.py +++ b/src/kili/core/helpers.py @@ -45,14 +45,22 @@ def format_result( return object_(formatted_json) -def get_data_type(path: str): - """Get the data type, either image/png or application/pdf. +def get_mime_type(path: str): + """Provide the mime type of a file (e.g. image/png). Args: path: path of the file """ - mime_type, _ = mimetypes.guess_type(path.lower()) - return mime_type if mime_type else "" + path = path.lower() + mime_type, _ = mimetypes.guess_type(path) + + # guess_type does not recognize JP2 files on Windows + if mime_type is None and path.endswith(".jp2"): + return "image/jp2" + # guess_type provides a wrong mime type for NITF files on Ubuntu + if path.endswith(".ntf") and mime_type != "application/vnd.nitf": + return "application/vnd.nitf" + return mime_type def is_url(path: object): @@ -278,7 +286,7 @@ def get_file_paths_to_upload( def check_file_mime_type(path: str, input_type: str, raise_error=True) -> bool: # pylint: disable=line-too-long """Returns true if the mime type of the file corresponds to the allowed mime types of the project.""" - mime_type = get_data_type(path.lower()) + mime_type = get_mime_type(path.lower()) if not (mime_extensions_for_IV2[input_type] and mime_type): return False diff --git a/src/kili/services/asset_import/base.py b/src/kili/services/asset_import/base.py index faaac006a..1d654f5e5 100644 --- a/src/kili/services/asset_import/base.py +++ b/src/kili/services/asset_import/base.py @@ -30,7 +30,7 @@ GQL_APPEND_MANY_ASSETS, GQL_APPEND_MANY_FRAMES_TO_DATASET, ) -from kili.core.helpers import RetryLongWaitWarner, T, format_result, is_url +from kili.core.helpers import RetryLongWaitWarner, T, format_result, get_mime_type, is_url from kili.core.utils.pagination import batcher from kili.domain.asset import AssetFilters from kili.domain.organization import OrganizationFilters @@ -440,7 +440,7 @@ def check_mime_type_compatibility(self, path: str): """ if not os.path.isfile(path): raise FileNotFoundError(f"file {path} does not exist") - mime_type, _ = mimetypes.guess_type(path) + mime_type = get_mime_type(path) if mime_type is None: raise MimeTypeError(f"The mime type of the asset {path} has not been found") diff --git a/src/kili/services/asset_import/image.py b/src/kili/services/asset_import/image.py index 690ddacea..b10079f7a 100644 --- a/src/kili/services/asset_import/image.py +++ b/src/kili/services/asset_import/image.py @@ -1,9 +1,11 @@ """Functions to import assets into an IMAGE project.""" -import mimetypes import os from typing import List +from kili.core.constants import mime_extensions_that_need_post_processing +from kili.core.helpers import get_mime_type + from .base import BaseAbstractAssetImporter, BatchParams, ContentBatchImporter from .constants import LARGE_IMAGE_THRESHOLD_SIZE from .types import AssetLike @@ -45,10 +47,9 @@ def split_asset_by_upload_type(assets: List[AssetLike], is_hosted: bool): path = asset.get("content") assert path assert isinstance(path, str) - mime_type, _ = mimetypes.guess_type(path.lower()) + mime_type = get_mime_type(path) is_large_image = os.path.getsize(path) >= LARGE_IMAGE_THRESHOLD_SIZE - is_tiff = mime_type == "image/tiff" - if is_large_image or is_tiff: + if is_large_image or mime_type in mime_extensions_that_need_post_processing: async_assets.append(asset) else: sync_assets.append(asset) diff --git a/src/kili/services/asset_import/video.py b/src/kili/services/asset_import/video.py index 7cd8e22fe..6c759be2a 100644 --- a/src/kili/services/asset_import/video.py +++ b/src/kili/services/asset_import/video.py @@ -1,13 +1,12 @@ """Functions to import assets into a VIDEO_LEGACY project.""" -import mimetypes import os from concurrent.futures import ThreadPoolExecutor from enum import Enum from itertools import repeat from typing import List -from kili.core.helpers import is_url +from kili.core.helpers import get_mime_type, is_url from kili.services.asset_import.base import ( BaseAbstractAssetImporter, BaseBatchImporter, @@ -117,7 +116,7 @@ def upload_frames_to_bucket(self, asset: AssetLike): for frame_path in frames: with open(frame_path, "rb") as file: data_array.append(file.read()) - content_type, _ = mimetypes.guess_type(frame_path) + content_type = get_mime_type(frame_path) content_type_array.append(content_type) with ThreadPoolExecutor() as threads: url_gen = threads.map( diff --git a/src/kili/services/plugins/upload.py b/src/kili/services/plugins/upload.py index 5b04173cc..5d56113fd 100644 --- a/src/kili/services/plugins/upload.py +++ b/src/kili/services/plugins/upload.py @@ -22,7 +22,7 @@ GQL_UPDATE_WEBHOOK, ) from kili.core.graphql.operations.plugin.queries import GQL_GET_PLUGIN_RUNNER_STATUS -from kili.core.helpers import get_data_type +from kili.core.helpers import get_mime_type from kili.services.plugins.tools import check_errors_plugin_upload from kili.utils import bucket from kili.utils.tempfile import TemporaryDirectory @@ -48,9 +48,9 @@ def check_file_mime_type( ) -> bool: # pylint: disable=line-too-long """Returns true if the mime type of the file corresponds to one of compatible_mime_extensions.""" - mime_type = get_data_type(path.as_posix()) + mime_type = get_mime_type(path.as_posix()) - if not (compatible_mime_extensions and mime_type): + if not compatible_mime_extensions or mime_type is None: return False correct_mime_type = mime_type in compatible_mime_extensions diff --git a/tests/unit/services/asset_import/test_import_image.py b/tests/unit/services/asset_import/test_import_image.py index da0c546cf..51aa13e04 100644 --- a/tests/unit/services/asset_import/test_import_image.py +++ b/tests/unit/services/asset_import/test_import_image.py @@ -44,6 +44,36 @@ def test_upload_from_one_hosted_image(self, *_): ) self.kili.graphql_client.execute.assert_called_with(*expected_parameters) + def test_upload_from_one_local_jp2_image(self, *_): + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"} + url = "https://storage.googleapis.com/label-public-staging/import-testing/test.jp2" + path_image = self.downloader(url) + assets = [{"content": path_image, "external_id": "local jp2 image"}] + import_assets(self.kili, self.project_id, assets) + expected_parameters = self.get_expected_async_call( + ["https://signed_url?id=id"], + ["local jp2 image"], + ["unique_id"], + ["{}"], + "GEO_SATELLITE", + ) + self.kili.graphql_client.execute.assert_called_with(*expected_parameters) + + def test_upload_from_one_local_ntf_image(self, *_): + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"} + url = "https://storage.googleapis.com/label-public-staging/import-testing/test.ntf" + path_image = self.downloader(url) + assets = [{"content": path_image, "external_id": "local ntf image"}] + import_assets(self.kili, self.project_id, assets) + expected_parameters = self.get_expected_async_call( + ["https://signed_url?id=id"], + ["local ntf image"], + ["unique_id"], + ["{}"], + "GEO_SATELLITE", + ) + self.kili.graphql_client.execute.assert_called_with(*expected_parameters) + def test_upload_from_one_local_tiff_image(self, *_): self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"} url = "https://storage.googleapis.com/label-public-staging/geotiffs/bogota.tif"