Skip to content

Commit

Permalink
fix: import with more than 100 external id
Browse files Browse the repository at this point in the history
  • Loading branch information
baptiste-olivier committed Oct 24, 2024
1 parent 4ae723b commit a1a6cec
Showing 1 changed file with 17 additions and 6 deletions.
23 changes: 17 additions & 6 deletions src/kili/services/asset_import/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@
from kili.utils import bucket
from kili.utils.tqdm import tqdm

FILTER_EXISTING_BATCH_SIZE = 1000


if TYPE_CHECKING:
from kili.client import Kili

Expand Down Expand Up @@ -575,12 +578,20 @@ def filter_duplicate_external_ids(self, assets):
"""Filter out assets whose external_id is already in the project."""
if len(assets) == 0:
raise ImportValidationError("No assets to import")

assets_external_ids = [asset.get("external_id") for asset in assets]
external_ids_in_project = self.kili.kili_api_gateway.filter_existing_assets(
self.project_params.project_id,
assets_external_ids,
)
assets_external_ids = [
asset.get("external_id") for asset in assets if asset.get("external_id")
]
# split assets_external_ids into chunks of 1000
assets_external_ids_chunks = [
assets_external_ids[x : x + FILTER_EXISTING_BATCH_SIZE]
for x in range(0, len(assets_external_ids), FILTER_EXISTING_BATCH_SIZE)
]
external_ids_in_project = []
for assets_external_ids_chunk in assets_external_ids_chunks:
external_ids_in_project += self.kili.kili_api_gateway.filter_existing_assets(
self.project_params.project_id,
assets_external_ids_chunk,
)

if len(external_ids_in_project) == len(assets):
raise ImportValidationError(
Expand Down

0 comments on commit a1a6cec

Please sign in to comment.