From 4261aa61bfef130d816f3bdb713b7786fec62548 Mon Sep 17 00:00:00 2001 From: "@fanny.gaudin" Date: Wed, 13 Nov 2024 14:10:40 +0100 Subject: [PATCH 1/3] fix(LAB-3244): remove order on already sorted chat_items --- src/kili/llm/services/export/dynamic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/kili/llm/services/export/dynamic.py b/src/kili/llm/services/export/dynamic.py index b0ddedf2c..0b39037eb 100644 --- a/src/kili/llm/services/export/dynamic.py +++ b/src/kili/llm/services/export/dynamic.py @@ -123,10 +123,9 @@ def _init_round(self, context): def _build_rounds(self, chat_items, annotations, json_interface): """A round is composed of a prompt with n pre-prompts and n completions.""" - ordered_chat_items = sorted(chat_items, key=lambda x: x["createdAt"]) rounds = [] current_round = self._init_round([]) - for chat_item in ordered_chat_items: + for chat_item in chat_items: role = chat_item["role"].lower() if chat_item["role"] else None if role == "user" or role == "system": if current_round["prompt"] is not None: From a4abd8fd2ae5759fade84d48b4aeb4e10c8a5756 Mon Sep 17 00:00:00 2001 From: "@baptiste33" Date: Thu, 14 Nov 2024 17:09:59 +0100 Subject: [PATCH 2/3] fix: export dynamic is backend order agnostic --- src/kili/llm/services/export/dynamic.py | 98 ++++++++++++------- .../unit/llm/services/export/test_dynamic.py | 24 ++--- 2 files changed, 76 insertions(+), 46 deletions(-) diff --git a/src/kili/llm/services/export/dynamic.py b/src/kili/llm/services/export/dynamic.py index 0b39037eb..7a2e2b7fe 100644 --- a/src/kili/llm/services/export/dynamic.py +++ b/src/kili/llm/services/export/dynamic.py @@ -8,7 +8,6 @@ CHAT_ITEMS_NEEDED_FIELDS = [ "id", "content", - "createdAt", "modelId", "parentId", "role", @@ -123,41 +122,72 @@ def _init_round(self, context): def _build_rounds(self, chat_items, annotations, json_interface): """A round is composed of a prompt with n pre-prompts and n completions.""" + dict_chat_items = {} + for chat_item in chat_items: + if dict_chat_items.get(chat_item["parentId"]) is None: + dict_chat_items[chat_item["parentId"]] = [] + dict_chat_items[chat_item["parentId"]].append(chat_item) rounds = [] + parent_target = None + has_children = True current_round = self._init_round([]) - for chat_item in chat_items: - role = chat_item["role"].lower() if chat_item["role"] else None - if role == "user" or role == "system": - if current_round["prompt"] is not None: - rounds.append(current_round) - new_context = ( - current_round["context"] - + current_round["pre_prompts"] - + [ - current_round["prompt"], - self._get_round_winner( - current_round["completion"], - current_round["annotations"], - json_interface, - ), - ] - ) - current_round = self._init_round(new_context) - - if role == "user": - current_round["prompt"] = chat_item - elif role == "system": - current_round["pre_prompts"].append(chat_item) - elif role == "assistant": - current_round["completion"].append(chat_item) - else: - raise ValueError(f"Role {chat_item['role']} not supported") - current_round["annotations"] += [ - annotation - for annotation in annotations - if annotation["chatItemId"] == chat_item["id"] - ] - rounds.append(current_round) + + while has_children: + node = dict_chat_items[parent_target][0] + if node["role"].lower() == "system": + current_round["pre_prompts"].append(node) + parent_target = node["id"] + current_round["annotations"] += [ + annotation + for annotation in annotations + if annotation["chatItemId"] == node["id"] + ] + continue + + if node["role"].lower() == "user": + current_round["prompt"] = node + parent_target = node["id"] + current_round["annotations"] += [ + annotation + for annotation in annotations + if annotation["chatItemId"] == node["id"] + ] + continue + + if node["role"].lower() == "assistant": + has_children = False + if dict_chat_items.get(parent_target) is None: + continue + for chat_item in dict_chat_items[parent_target]: + current_round["completion"].append(chat_item) + current_round["annotations"] += [ + annotation + for annotation in annotations + if annotation["chatItemId"] == chat_item["id"] + ] + if not has_children and dict_chat_items.get(chat_item["id"]) is not None: + has_children = True + parent_target = chat_item["id"] + + rounds.append(current_round) + new_context = ( + current_round["context"] + + current_round["pre_prompts"] + + [ + current_round["prompt"], + self._get_round_winner( + current_round["completion"], + current_round["annotations"], + json_interface, + ), + ] + ) + current_round = self._init_round(new_context) + continue + + raise ValueError(f"Role {node['role']} not supported") + if current_round["prompt"] is not None: + rounds.append(current_round) return rounds diff --git a/tests/unit/llm/services/export/test_dynamic.py b/tests/unit/llm/services/export/test_dynamic.py index fff2e6ab2..01b8628b9 100644 --- a/tests/unit/llm/services/export/test_dynamic.py +++ b/tests/unit/llm/services/export/test_dynamic.py @@ -111,12 +111,12 @@ }, "chatItems": [ { - "id": "cm2u6kgcc001aj7ja1stsbrvu", - "content": "You are a helpful assistant", - "createdAt": "2024-08-06T12:28:52.170Z", - "modelId": None, - "parentId": None, - "role": "SYSTEM", + "id": "clziefeoe003m7tc976xwbh58", + "content": "Turtles are reptiles known for their protective shells, which act as both home and armor. They are slow-moving on land but can be agile in water, with many species being excellent swimmers. Turtles are omnivorous, feeding on plants, insects, and small animals. They are cold-blooded and rely on external heat sources to regulate their body temperature. Turtles are long-lived, with some species living over 100 years. They are found on every continent except Antarctica, inhabiting a variety of environments including oceans, freshwater lakes, and even deserts. Many turtle species are endangered due to habitat loss and other factors.", + "createdAt": "2024-08-06T12:30:52.430Z", + "modelId": "clzief6pr003c7tc99680e8yj", + "parentId": "clziefeh6003k7tc99abderkk", + "role": "ASSISTANT", }, { "id": "clziefeh6003k7tc99abderkk", @@ -127,12 +127,12 @@ "role": "USER", }, { - "id": "clziefeoe003m7tc976xwbh58", - "content": "Turtles are reptiles known for their protective shells, which act as both home and armor. They are slow-moving on land but can be agile in water, with many species being excellent swimmers. Turtles are omnivorous, feeding on plants, insects, and small animals. They are cold-blooded and rely on external heat sources to regulate their body temperature. Turtles are long-lived, with some species living over 100 years. They are found on every continent except Antarctica, inhabiting a variety of environments including oceans, freshwater lakes, and even deserts. Many turtle species are endangered due to habitat loss and other factors.", - "createdAt": "2024-08-06T12:30:52.430Z", - "modelId": "clzief6pr003c7tc99680e8yj", - "parentId": "clziefeh6003k7tc99abderkk", - "role": "ASSISTANT", + "id": "cm2u6kgcc001aj7ja1stsbrvu", + "content": "You are a helpful assistant", + "createdAt": "2024-08-06T12:28:52.170Z", + "modelId": None, + "parentId": None, + "role": "SYSTEM", }, { "id": "clziefepk003n7tc9fyx49vei", From ac0baa6063646569a61cbf5c3d99d9a4c852c228 Mon Sep 17 00:00:00 2001 From: "@baptiste33" Date: Fri, 15 Nov 2024 11:28:40 +0100 Subject: [PATCH 3/3] fix: export dynamic without completions --- src/kili/llm/services/export/dynamic.py | 6 +- .../unit/llm/services/export/test_dynamic.py | 131 ++++++++++++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) diff --git a/src/kili/llm/services/export/dynamic.py b/src/kili/llm/services/export/dynamic.py index 7a2e2b7fe..867023d91 100644 --- a/src/kili/llm/services/export/dynamic.py +++ b/src/kili/llm/services/export/dynamic.py @@ -133,7 +133,11 @@ def _build_rounds(self, chat_items, annotations, json_interface): current_round = self._init_round([]) while has_children: - node = dict_chat_items[parent_target][0] + nodes = dict_chat_items.get(parent_target) + if nodes is None or len(nodes) == 0: + has_children = False + continue + node = nodes[0] if node["role"].lower() == "system": current_round["pre_prompts"].append(node) parent_target = node["id"] diff --git a/tests/unit/llm/services/export/test_dynamic.py b/tests/unit/llm/services/export/test_dynamic.py index 01b8628b9..9725cea2a 100644 --- a/tests/unit/llm/services/export/test_dynamic.py +++ b/tests/unit/llm/services/export/test_dynamic.py @@ -449,6 +449,115 @@ } ] +mock_fetch_assets_no_completions = [ + { + "labels": [ + { + "annotations": [], + "author": { + "id": "user-1", + "email": "test+admin@kili-technology.com", + "firstname": "Test", + "lastname": "Admin", + }, + "chatItems": [ + { + "id": "clziefeh6003k7tc99abderkk", + "content": "describe turtle in 100 words", + "createdAt": "2024-08-06T12:30:52.170Z", + "modelId": None, + "parentId": "cm2u6kgcc001aj7ja1stsbrvu", + "role": "USER", + }, + { + "id": "cm2u6kgcc001aj7ja1stsbrvu", + "content": "You are a helpful assistant", + "createdAt": "2024-08-06T12:28:52.170Z", + "modelId": None, + "parentId": None, + "role": "SYSTEM", + }, + ], + "createdAt": "2024-08-06T12:30:42.122Z", + "isLatestLabelForUser": True, + "isSentBackToQueue": False, + "id": "clzief6q2003e7tc91jm46uii", + "jsonResponse": {}, + "labelType": "AUTOSAVE", + "modelName": None, + } + ], + "content": "", + "assetProjectModels": [ + { + "id": "clzief6pr003c7tc99680e8yj", + "projectModelId": "ProjectModelA", + "configuration": {"temperature": 1, "model": "my-model-1"}, + }, + { + "id": "clzief6ps003d7tc9fzgj2xkf", + "projectModelId": "ProjectModelB", + "configuration": {"temperature": 0.5, "model": "my-model-2"}, + }, + ], + "externalId": "clzief6pg003a7tc9cn0p1obf", + "jsonMetadata": {}, + "status": "ONGOING", + } +] + +expected_export_ongoing = [ + { + "0": { + "raw_data": [ + { + "id": "cm2u6kgcc001aj7ja1stsbrvu", + "role": "system", + "chat_id": "clzief6q2003e7tc91jm46uii", + "content": "You are a helpful assistant", + "model": None, + }, + { + "content": "describe turtle in 100 words", + "role": "user", + "chat_id": "clzief6q2003e7tc91jm46uii", + "id": "clziefeh6003k7tc99abderkk", + "model": None, + }, + ], + "status": "ONGOING", + "external_id": "clzief6pg003a7tc9cn0p1obf", + "metadata": {}, + "models": { + "A": { + "configuration": { + "model": "my-model-1", + "temperature": 1, + }, + "id": "clzief6pr003c7tc99680e8yj", + "projectModelId": "ProjectModelA", + }, + "B": { + "configuration": { + "model": "my-model-2", + "temperature": 0.5, + }, + "id": "clzief6ps003d7tc9fzgj2xkf", + "projectModelId": "ProjectModelB", + }, + }, + "labels": [ + { + "author": "test+admin@kili-technology.com", + "created_at": "2024-08-06T12:30:42.122Z", + "label_type": "AUTOSAVE", + "label": {}, + } + ], + }, + } +] + def test_export_dynamic(mocker): get_project_return_val = { @@ -471,6 +580,28 @@ def test_export_dynamic(mocker): assert result == expected_export +def test_export_dynamic_ongoing(mocker): + get_project_return_val = { + "jsonInterface": mock_json_interface, + "inputType": "LLM_INSTR_FOLLOWING", + "title": "Test project", + "id": "project_id", + "dataConnections": None, + } + kili_api_gateway = mocker.MagicMock() + kili_api_gateway.count_assets.return_value = 3 + kili_api_gateway.get_project.return_value = get_project_return_val + kili_api_gateway.list_assets.return_value = mock_fetch_assets_no_completions + + kili_llm = LlmClientMethods(kili_api_gateway) + + result = kili_llm.export( + project_id="project_id", + label_type_in=["DEFAULT", "REVIEW", "AUTOSAVE"], + ) + assert result == expected_export_ongoing + + def test_export_dynamic_empty_json_interface(mocker): get_project_return_val = { "jsonInterface": mock_empty_json_interface,