From 59854fc87a7bfb8cd74d5161efc485d1a5292a99 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 31 Jul 2024 10:31:11 -0400 Subject: [PATCH 1/9] Fix table and column names #10 --- ...002_etl_collections_to_controlled_lists.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 2889837..8ba7021 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -77,30 +77,30 @@ class Migration(migrations.Migration): -- If overwrite flag is provided, completely recreate the list/items/values if overwrite then - delete from controlled_list_item_values - where itemid in ( + delete from arches_references_listitemvalue + where list_item_id in ( select id - from controlled_list_items - where listid in ( + from arches_references_listitem + where list_id in ( select id - from controlled_lists + from arches_references_list where name = any(collection_names) ) ); - delete from controlled_list_items - where listid in ( + delete from arches_references_listitem + where list_id in ( select id - from controlled_lists + from arches_references_list where name = any(collection_names) ); - delete from controlled_lists + delete from arches_references_list where name = any(collection_names); end if; -- Migrate Collection -> Controlled List - insert into controlled_lists ( + insert into arches_references_list ( id, name, dynamic, @@ -171,43 +171,43 @@ class Migration(migrations.Migration): alpha_sorted_list_item_hierarchy as ( select child as id, row_number() over (partition by root_list order by depth, LOWER(value)) - 1 as sortorder, - root_list as listid, + root_list as list_id, case when conceptidfrom = root_list then null -- list items at top of hierarchy have no parent list item else conceptidfrom end as parent_id, depth from ranked_prefLabels rpl where language_rank = 1 and - root_list in (select id from controlled_lists where name = ANY(collection_names)) + root_list in (select id from arches_references_list where name = ANY(collection_names)) ) - insert into controlled_list_items( + insert into arches_references_listitem( id, uri, sortorder, guide, - listid, + list_id, parent_id ) select id, host || id as uri, sortorder, false as guide, - listid, + list_id, parent_id from alpha_sorted_list_item_hierarchy; -- Migrate concept values -> controlled list item values - insert into controlled_list_item_values ( + insert into arches_references_listitemvalue ( id, value, - itemid, + list_item_id, languageid, valuetype_id ) select distinct (v.valueid) id, value, - r.conceptidto as itemid, + r.conceptidto as list_item_id, languageid, valuetype as valuetype_id from relations r @@ -215,8 +215,8 @@ class Migration(migrations.Migration): where relationtype = 'member' and (valuetype = 'prefLabel' or valuetype = 'altLabel') and r.conceptidto in ( - select id from controlled_list_items where listid in ( - select id from controlled_lists where name = ANY(collection_names) + select id from arches_references_listitem where list_id in ( + select id from arches_references_list where name = ANY(collection_names) ) ); From 6b1426992f3f1e445a0383003e9c622709982a39 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 31 Jul 2024 10:31:58 -0400 Subject: [PATCH 2/9] Add rdm to clm CLI migration path --- .../management/commands/controlled_lists.py | 120 ++++++++++++++++++ tests/cli_tests.py | 43 ++++++- tests/data/concept_label_test_collection.xml | 28 ++++ 3 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 arches_references/management/commands/controlled_lists.py create mode 100644 tests/data/concept_label_test_collection.xml diff --git a/arches_references/management/commands/controlled_lists.py b/arches_references/management/commands/controlled_lists.py new file mode 100644 index 0000000..d810229 --- /dev/null +++ b/arches_references/management/commands/controlled_lists.py @@ -0,0 +1,120 @@ +from arches.app.models.models import Concept, Value +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + """ + Commands for running controlled list operations + + """ + + def add_arguments(self, parser): + parser.add_argument( + "-o", + "--operation", + action="store", + dest="operation", + required=True, + choices=["migrate_collections_to_controlled_lists"], + help="The operation to perform", + ) + + parser.add_argument( + "-co", + "--collections", + action="store", + dest="collections_to_migrate", + nargs="*", + required=True, + help="One or more collections to migrate to controlled lists", + ) + + parser.add_argument( + "-ho", + "--host", + action="store", + dest="host", + default="http://localhost:8000/plugins/controlled-list-manager/item/", + help="Provide a host for URI generation. Default is localhost", + ) + + parser.add_argument( + "-ow", + "--overwrite", + action="store_true", + dest="overwrite", + default=False, + help="Overwrite the entire controlled list and its list items/values. Default false.", + ) + + parser.add_argument( + "-psl", + "--preferred_sort_language", + action="store", + dest="preferred_sort_language", + default="en", + help="The language to use for sorting preferred labels. Default 'en'", + ) + + def handle(self, *args, **options): + if options["operation"] == "migrate_collections_to_controlled_lists": + self.migrate_collections_to_controlled_lists( + collections_to_migrate=options["collections_to_migrate"], + host=options["host"], + overwrite=options["overwrite"], + preferred_sort_language=options["preferred_sort_language"], + ) + + def migrate_collections_to_controlled_lists( + self, + collections_to_migrate, + host, + overwrite, + preferred_sort_language, + ): + """ + Uses a postgres function to migrate collections to controlled lists + + Example usage: + python manage.py controlled_lists + -o migrate_collections_to_controlled_lists + -co 'Johns list' 'Getty AAT' + -ho 'http://localhost:8000/plugins/controlled-list-manager/item/' + -psl 'fr' + -ow + """ + + collections_in_db = list( + Value.objects.filter( + value__in=collections_to_migrate, + valuetype__in=["prefLabel", "identifier"], + concept__nodetype="Collection", + ).values_list("value", flat=True) + ) + + failed_collections = [ + collection + for collection in collections_to_migrate + if collection not in collections_in_db + ] + + if len(failed_collections) > 0: + self.stdout.write( + "Failed to find the following collections in the database: %s" + % ", ".join(failed_collections) + ) + + if len(collections_in_db) > 0: + from django.db import connection + + cursor = connection.cursor() + cursor.execute( + """ + select * from __arches_migrate_collections_to_clm( + ARRAY[%s], %s, %s::boolean, %s + ); + """, + [collections_in_db, host, overwrite, preferred_sort_language], + ) + result = cursor.fetchone() + self.stdout.write(result[0]) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 97d1735..0665673 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -3,10 +3,13 @@ from django.conf import settings from django.core import management +from django.urls import reverse from django.test import TestCase +from django.test.client import Client from django.test.utils import captured_stdout -from arches_references.models import List +from arches_references.models import List, ListItem, ListItemValue +from arches.app.utils.skos import SKOSReader from .test_settings import PROJECT_TEST_ROOT @@ -57,3 +60,41 @@ def test_import_controlled_list(self): self.assertTrue(List.objects.filter(pk=list_pk).exists()) ### TODO Add test for creating new language if language code not in db but found in import file + + +class RDMToControlledListsETLTests(TestCase): + + @classmethod + def setUpTestData(cls): + + skos = SKOSReader() + rdf = skos.read_file( + os.path.join(PROJECT_TEST_ROOT, "data/concept_label_test_collection.xml") + ) + ret = skos.save_concepts_from_skos(rdf) + + client = Client() + client.login(username="admin", password="admin") + response = client.get( + reverse( + "make_collection", + kwargs={"conceptid": "7c90899a-dbe9-4574-9175-e69481a80b3c"}, + ) + ) + + def test_migrate_collections_to_controlled_lists(self): + output = io.StringIO() + management.call_command( + "controlled_lists", + operation="migrate_collections_to_controlled_lists", + collections_to_migrate=["Concept Label Import Test"], + host="http://localhost:8000/plugins/controlled-list-manager/item/", + preferred_sort_language="en", + overwrite=False, + stdout=output, + ) + + self.assertTrue(List.objects.filter(name="Concept Label Import Test").exists()) + self.assertTrue( + ListItem.objects.filter(id="89ff530a-f350-44f0-ac88-bdd8904eb57e").exists() + ) diff --git a/tests/data/concept_label_test_collection.xml b/tests/data/concept_label_test_collection.xml new file mode 100644 index 0000000..e06ed1b --- /dev/null +++ b/tests/data/concept_label_test_collection.xml @@ -0,0 +1,28 @@ + + + + + + + {"id": "7949d7b5-6e57-469a-8f38-87aac08e1788", "value": "Test Concept 2"} + + + + + {"id": "fad6f17d-f7c8-4fa1-b358-e8626571599e", "value": "Test Concept 3"} + + + + + + + {"id": "9fa56006-6828-480f-8395-ad5c5a84726b", "value": "Test Concept 1"} + + + {"id": "f5e1a756-c658-4a3c-bc3a-e9293242e8f7", "value": "Concept Label Import Test"} + + From 0531901c5cf20cbdae21e637a55c1db3b263df1d Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Mon, 12 Aug 2024 09:32:44 -0400 Subject: [PATCH 3/9] Un-americanize test data --- tests/data/concept_label_test_collection.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/data/concept_label_test_collection.xml b/tests/data/concept_label_test_collection.xml index e06ed1b..a8d838f 100644 --- a/tests/data/concept_label_test_collection.xml +++ b/tests/data/concept_label_test_collection.xml @@ -8,21 +8,21 @@ - {"id": "7949d7b5-6e57-469a-8f38-87aac08e1788", "value": "Test Concept 2"} + {"id": "7949d7b5-6e57-469a-8f38-87aac08e1788", "value": "Test Concept 2"} - {"id": "fad6f17d-f7c8-4fa1-b358-e8626571599e", "value": "Test Concept 3"} + {"id": "fad6f17d-f7c8-4fa1-b358-e8626571599e", "value": "Test Concept 3"} - {"id": "9fa56006-6828-480f-8395-ad5c5a84726b", "value": "Test Concept 1"} + {"id": "9fa56006-6828-480f-8395-ad5c5a84726b", "value": "Test Concept 1"} - {"id": "f5e1a756-c658-4a3c-bc3a-e9293242e8f7", "value": "Concept Label Import Test"} + {"id": "f5e1a756-c658-4a3c-bc3a-e9293242e8f7", "value": "Concept Label Import Test"} From 6b58d0f50c20384d255c9c44a5aabec90ca53993 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Mon, 12 Aug 2024 09:33:21 -0400 Subject: [PATCH 4/9] Remove unused imports --- tests/cli_tests.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 0665673..15a6a48 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -1,14 +1,13 @@ import io import os -from django.conf import settings from django.core import management from django.urls import reverse from django.test import TestCase from django.test.client import Client from django.test.utils import captured_stdout -from arches_references.models import List, ListItem, ListItemValue +from arches_references.models import List, ListItem from arches.app.utils.skos import SKOSReader from .test_settings import PROJECT_TEST_ROOT From 51b40b1ee920b21eb809e9673d9b8429b0774d55 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Mon, 12 Aug 2024 09:34:06 -0400 Subject: [PATCH 5/9] Separate args to path.join --- tests/cli_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 15a6a48..00e3c48 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -45,7 +45,7 @@ def test_export_controlled_list(self): class ListImportPackageTests(TestCase): def test_import_controlled_list(self): - input_file = os.path.join(PROJECT_TEST_ROOT, "data/controlled_lists.xlsx") + input_file = os.path.join(PROJECT_TEST_ROOT, "data", "controlled_lists.xlsx") output = io.StringIO() # packages command does not yet fully avoid print() with captured_stdout(): @@ -68,7 +68,7 @@ def setUpTestData(cls): skos = SKOSReader() rdf = skos.read_file( - os.path.join(PROJECT_TEST_ROOT, "data/concept_label_test_collection.xml") + os.path.join(PROJECT_TEST_ROOT, "data", "concept_label_test_collection.xml") ) ret = skos.save_concepts_from_skos(rdf) From 79e7b5df642d3c85496d0db3b0261318b7a13608 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Mon, 12 Aug 2024 12:43:02 -0400 Subject: [PATCH 6/9] Assert against total list items, rather than uuid --- tests/cli_tests.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 00e3c48..293b199 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -93,7 +93,6 @@ def test_migrate_collections_to_controlled_lists(self): stdout=output, ) - self.assertTrue(List.objects.filter(name="Concept Label Import Test").exists()) - self.assertTrue( - ListItem.objects.filter(id="89ff530a-f350-44f0-ac88-bdd8904eb57e").exists() - ) + imported_list = List.objects.get(name="Concept Label Import Test") + imported_items = imported_list.list_items.all() + self.assertEqual(len(imported_items), 3) From 667df5b332a743f3d386199e45c8f1cf56076e06 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Mon, 12 Aug 2024 12:43:54 -0400 Subject: [PATCH 7/9] Add test for no matching collection --- .../management/commands/controlled_lists.py | 4 ++-- tests/cli_tests.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arches_references/management/commands/controlled_lists.py b/arches_references/management/commands/controlled_lists.py index d810229..e04530f 100644 --- a/arches_references/management/commands/controlled_lists.py +++ b/arches_references/management/commands/controlled_lists.py @@ -1,4 +1,4 @@ -from arches.app.models.models import Concept, Value +from arches.app.models.models import Value from django.core.management.base import BaseCommand @@ -99,7 +99,7 @@ def migrate_collections_to_controlled_lists( ] if len(failed_collections) > 0: - self.stdout.write( + self.stderr.write( "Failed to find the following collections in the database: %s" % ", ".join(failed_collections) ) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 293b199..8feb761 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -96,3 +96,18 @@ def test_migrate_collections_to_controlled_lists(self): imported_list = List.objects.get(name="Concept Label Import Test") imported_items = imported_list.list_items.all() self.assertEqual(len(imported_items), 3) + + def test_no_matching_collection_error(self): + expected_output = "Failed to find the following collections in the database: Collection That Doesn't Exist" + with captured_stdout() as output: + management.call_command( + "controlled_lists", + operation="migrate_collections_to_controlled_lists", + collections_to_migrate=["Collection That Doesn't Exist"], + host="http://localhost:8000/plugins/controlled-list-manager/item/", + preferred_sort_language="en", + overwrite=False, + stdout=output, + stderr=output, + ) + self.assertIn(expected_output, output.getvalue().strip()) From c1e02eb6431b7c08ec51b3c06a51a460e7b395cb Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 12 Aug 2024 12:53:45 -0400 Subject: [PATCH 8/9] Avoid piping both streams to same output --- tests/cli_tests.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 8feb761..d050a2b 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -99,15 +99,14 @@ def test_migrate_collections_to_controlled_lists(self): def test_no_matching_collection_error(self): expected_output = "Failed to find the following collections in the database: Collection That Doesn't Exist" - with captured_stdout() as output: - management.call_command( - "controlled_lists", - operation="migrate_collections_to_controlled_lists", - collections_to_migrate=["Collection That Doesn't Exist"], - host="http://localhost:8000/plugins/controlled-list-manager/item/", - preferred_sort_language="en", - overwrite=False, - stdout=output, - stderr=output, - ) + output = io.StringIO() + management.call_command( + "controlled_lists", + operation="migrate_collections_to_controlled_lists", + collections_to_migrate=["Collection That Doesn't Exist"], + host="http://localhost:8000/plugins/controlled-list-manager/item/", + preferred_sort_language="en", + overwrite=False, + stderr=output, + ) self.assertIn(expected_output, output.getvalue().strip()) From eeddc4bf308dc2bae33bb4e5b51afad63571f714 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 12 Aug 2024 12:53:54 -0400 Subject: [PATCH 9/9] Remove unused import --- tests/cli_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index d050a2b..e87b7c0 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -7,7 +7,7 @@ from django.test.client import Client from django.test.utils import captured_stdout -from arches_references.models import List, ListItem +from arches_references.models import List from arches.app.utils.skos import SKOSReader from .test_settings import PROJECT_TEST_ROOT