Skip to content

Commit

Permalink
fix(lab-2558): import jp2 and ntf images asynchronously (#1619)
Browse files Browse the repository at this point in the history
Co-authored-by: Josselin BUILS <[email protected]>
  • Loading branch information
josselinbuils and Josselin BUILS authored Jan 9, 2024
1 parent 831867e commit 90bc7d6
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 17 deletions.
4 changes: 4 additions & 0 deletions src/kili/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
mime_extensions_for_py_scripts = ["text/x-python"]
mime_extensions_for_txt_files = ["text/plain"]

# These extensions require post-processing either because they can contain geospatial metadata,
# or because they are not directly supported by the browser.
mime_extensions_that_need_post_processing = ["application/vnd.nitf", "image/jp2", "image/tiff"]

QUERY_BATCH_SIZE = 100
MUTATION_BATCH_SIZE = 100
MAX_CALLS_PER_MINUTE = 500
18 changes: 13 additions & 5 deletions src/kili/core/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,22 @@ def format_result(
return object_(formatted_json)


def get_data_type(path: str):
"""Get the data type, either image/png or application/pdf.
def get_mime_type(path: str):
"""Provide the mime type of a file (e.g. image/png).
Args:
path: path of the file
"""
mime_type, _ = mimetypes.guess_type(path.lower())
return mime_type if mime_type else ""
path = path.lower()
mime_type, _ = mimetypes.guess_type(path)

# guess_type does not recognize JP2 files on Windows
if mime_type is None and path.endswith(".jp2"):
return "image/jp2"
# guess_type provides a wrong mime type for NITF files on Ubuntu
if path.endswith(".ntf") and mime_type != "application/vnd.nitf":
return "application/vnd.nitf"
return mime_type


def is_url(path: object):
Expand Down Expand Up @@ -278,7 +286,7 @@ def get_file_paths_to_upload(
def check_file_mime_type(path: str, input_type: str, raise_error=True) -> bool:
# pylint: disable=line-too-long
"""Returns true if the mime type of the file corresponds to the allowed mime types of the project."""
mime_type = get_data_type(path.lower())
mime_type = get_mime_type(path.lower())

if not (mime_extensions_for_IV2[input_type] and mime_type):
return False
Expand Down
4 changes: 2 additions & 2 deletions src/kili/services/asset_import/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
GQL_APPEND_MANY_ASSETS,
GQL_APPEND_MANY_FRAMES_TO_DATASET,
)
from kili.core.helpers import RetryLongWaitWarner, T, format_result, is_url
from kili.core.helpers import RetryLongWaitWarner, T, format_result, get_mime_type, is_url
from kili.core.utils.pagination import batcher
from kili.domain.asset import AssetFilters
from kili.domain.organization import OrganizationFilters
Expand Down Expand Up @@ -440,7 +440,7 @@ def check_mime_type_compatibility(self, path: str):
"""
if not os.path.isfile(path):
raise FileNotFoundError(f"file {path} does not exist")
mime_type, _ = mimetypes.guess_type(path)
mime_type = get_mime_type(path)
if mime_type is None:
raise MimeTypeError(f"The mime type of the asset {path} has not been found")

Expand Down
9 changes: 5 additions & 4 deletions src/kili/services/asset_import/image.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""Functions to import assets into an IMAGE project."""

import mimetypes
import os
from typing import List

from kili.core.constants import mime_extensions_that_need_post_processing
from kili.core.helpers import get_mime_type

from .base import BaseAbstractAssetImporter, BatchParams, ContentBatchImporter
from .constants import LARGE_IMAGE_THRESHOLD_SIZE
from .types import AssetLike
Expand Down Expand Up @@ -45,10 +47,9 @@ def split_asset_by_upload_type(assets: List[AssetLike], is_hosted: bool):
path = asset.get("content")
assert path
assert isinstance(path, str)
mime_type, _ = mimetypes.guess_type(path.lower())
mime_type = get_mime_type(path)
is_large_image = os.path.getsize(path) >= LARGE_IMAGE_THRESHOLD_SIZE
is_tiff = mime_type == "image/tiff"
if is_large_image or is_tiff:
if is_large_image or mime_type in mime_extensions_that_need_post_processing:
async_assets.append(asset)
else:
sync_assets.append(asset)
Expand Down
5 changes: 2 additions & 3 deletions src/kili/services/asset_import/video.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
"""Functions to import assets into a VIDEO_LEGACY project."""

import mimetypes
import os
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
from itertools import repeat
from typing import List

from kili.core.helpers import is_url
from kili.core.helpers import get_mime_type, is_url
from kili.services.asset_import.base import (
BaseAbstractAssetImporter,
BaseBatchImporter,
Expand Down Expand Up @@ -117,7 +116,7 @@ def upload_frames_to_bucket(self, asset: AssetLike):
for frame_path in frames:
with open(frame_path, "rb") as file:
data_array.append(file.read())
content_type, _ = mimetypes.guess_type(frame_path)
content_type = get_mime_type(frame_path)
content_type_array.append(content_type)
with ThreadPoolExecutor() as threads:
url_gen = threads.map(
Expand Down
6 changes: 3 additions & 3 deletions src/kili/services/plugins/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
GQL_UPDATE_WEBHOOK,
)
from kili.core.graphql.operations.plugin.queries import GQL_GET_PLUGIN_RUNNER_STATUS
from kili.core.helpers import get_data_type
from kili.core.helpers import get_mime_type
from kili.services.plugins.tools import check_errors_plugin_upload
from kili.utils import bucket
from kili.utils.tempfile import TemporaryDirectory
Expand All @@ -48,9 +48,9 @@ def check_file_mime_type(
) -> bool:
# pylint: disable=line-too-long
"""Returns true if the mime type of the file corresponds to one of compatible_mime_extensions."""
mime_type = get_data_type(path.as_posix())
mime_type = get_mime_type(path.as_posix())

if not (compatible_mime_extensions and mime_type):
if not compatible_mime_extensions or mime_type is None:
return False

correct_mime_type = mime_type in compatible_mime_extensions
Expand Down
30 changes: 30 additions & 0 deletions tests/unit/services/asset_import/test_import_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,36 @@ def test_upload_from_one_hosted_image(self, *_):
)
self.kili.graphql_client.execute.assert_called_with(*expected_parameters)

def test_upload_from_one_local_jp2_image(self, *_):
self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"}
url = "https://storage.googleapis.com/label-public-staging/import-testing/test.jp2"
path_image = self.downloader(url)
assets = [{"content": path_image, "external_id": "local jp2 image"}]
import_assets(self.kili, self.project_id, assets)
expected_parameters = self.get_expected_async_call(
["https://signed_url?id=id"],
["local jp2 image"],
["unique_id"],
["{}"],
"GEO_SATELLITE",
)
self.kili.graphql_client.execute.assert_called_with(*expected_parameters)

def test_upload_from_one_local_ntf_image(self, *_):
self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"}
url = "https://storage.googleapis.com/label-public-staging/import-testing/test.ntf"
path_image = self.downloader(url)
assets = [{"content": path_image, "external_id": "local ntf image"}]
import_assets(self.kili, self.project_id, assets)
expected_parameters = self.get_expected_async_call(
["https://signed_url?id=id"],
["local ntf image"],
["unique_id"],
["{}"],
"GEO_SATELLITE",
)
self.kili.graphql_client.execute.assert_called_with(*expected_parameters)

def test_upload_from_one_local_tiff_image(self, *_):
self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"}
url = "https://storage.googleapis.com/label-public-staging/geotiffs/bogota.tif"
Expand Down

0 comments on commit 90bc7d6

Please sign in to comment.