Skip to content

Commit

Permalink
feat: use a dedicated endpoint to check if an external id is in a pro…
Browse files Browse the repository at this point in the history
…ject
  • Loading branch information
baptiste-olivier committed Oct 24, 2024
1 parent 33b921c commit 6f23abb
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 11 deletions.
7 changes: 7 additions & 0 deletions src/kili/adapters/kili_api_gateway/asset/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,10 @@ def get_assets_query(fragment: str) -> str:
urls: createUploadBucketSignedUrls(filePaths: $filePaths)
}
"""


GQL_FILTER_EXISTING_ASSETS = """
query FilterExistingAssets($projectID: ID!, $externalIDs: [String!]!) {
external_ids: filterExistingAssets(projectID: $projectID, externalIDs: $externalIDs)
}
"""
10 changes: 10 additions & 0 deletions src/kili/adapters/kili_api_gateway/asset/operations_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from kili.adapters.kili_api_gateway.asset.operations import (
GQL_COUNT_ASSETS,
GQL_CREATE_UPLOAD_BUCKET_SIGNED_URLS,
GQL_FILTER_EXISTING_ASSETS,
get_assets_query,
)
from kili.adapters.kili_api_gateway.base import BaseOperationMixin
Expand Down Expand Up @@ -125,3 +126,12 @@ def create_upload_bucket_signed_urls(self, file_paths: List[str]) -> List[str]:
}
urls_response = self.graphql_client.execute(GQL_CREATE_UPLOAD_BUCKET_SIGNED_URLS, payload)
return urls_response["urls"]

def filter_existing_assets(self, project_id: str, assets_external_ids: ListOrTuple[str]):
"""Send a GraphQL request calling filterExistingAssets resolver."""
payload = {
"projectID": project_id,
"externalIDs": assets_external_ids,
}
external_id_response = self.graphql_client.execute(GQL_FILTER_EXISTING_ASSETS, payload)
return external_id_response["external_ids"]
22 changes: 11 additions & 11 deletions src/kili/services/asset_import/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
)
from kili.core.helpers import T, format_result, get_mime_type, is_url
from kili.core.utils.pagination import batcher
from kili.domain.asset import AssetFilters
from kili.domain.organization import OrganizationFilters
from kili.domain.project import InputType, ProjectId
from kili.domain.types import ListOrTuple
Expand Down Expand Up @@ -576,26 +575,27 @@ def filter_duplicate_external_ids(self, assets):
"""Filter out assets whose external_id is already in the project."""
if len(assets) == 0:
raise ImportValidationError("No assets to import")
assets_in_project = self.kili.kili_api_gateway.list_assets(
AssetFilters(project_id=self.project_params.project_id),
["externalId"],
QueryOptions(disable_tqdm=True),

assets_external_ids = [asset.get("external_id") for asset in assets]
external_ids_in_project = self.kili.kili_api_gateway.filter_existing_assets(
self.project_params.project_id,
assets_external_ids,
)
external_ids_in_project = [asset["externalId"] for asset in assets_in_project]
filtered_assets = [
asset for asset in assets if asset.get("external_id") not in external_ids_in_project
]
if len(filtered_assets) == 0:

if len(external_ids_in_project) == len(assets):
raise ImportValidationError(
"No assets to import, all given external_ids already exist in the project"
)
nb_duplicate_assets = len(assets) - len(filtered_assets)
nb_duplicate_assets = len(external_ids_in_project)
if nb_duplicate_assets > 0:
warnings.warn(
f"{nb_duplicate_assets} assets were not imported because their external_id are"
" already in the project",
stacklevel=2,
)
filtered_assets = [
asset for asset in assets if asset.get("external_id") not in external_ids_in_project
]
return filtered_assets

def import_assets_by_batch(
Expand Down

0 comments on commit 6f23abb

Please sign in to comment.