Skip to content

Commit

Permalink
Stash RDM to CLM cli and tests #11070
Browse files Browse the repository at this point in the history
  • Loading branch information
johnatawnclementawn committed Jun 29, 2024
1 parent 79cf887 commit 5be923f
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 0 deletions.
120 changes: 120 additions & 0 deletions arches/management/commands/etl_processes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from arches.app.models.models import Concept, Value
from django.core.management.base import BaseCommand


class Command(BaseCommand):
"""
Commands for running Arches ETL processes
"""

def add_arguments(self, parser):
parser.add_argument(
"-o",
"--operation",
action="store",
dest="operation",
required=True,
choices=["migrate_collections_to_controlled_lists"],
help="The operation to perform",
)

parser.add_argument(
"-co",
"--collections",
action="store",
dest="collections_to_migrate",
nargs="*",
required=True,
help="One or more collections to migrate to controlled lists",
)

parser.add_argument(
"-ho",
"--host",
action="store",
dest="host",
default="http://localhost:8000/plugins/controlled-list-manager/item/",
help="Provide a host for URI generation. Default is localhost",
)

parser.add_argument(
"-ow",
"--overwrite",
action="store_true",
dest="overwrite",
default=False,
help="Overwrite the entire controlled list and its list items/values. Default false.",
)

parser.add_argument(
"-psl",
"--preferred_sort_language",
action="store",
dest="preferred_sort_language",
default="en",
help="The language to use for sorting preferred labels. Default 'en'",
)

def handle(self, *args, **options):
if options["operation"] == "migrate_collections_to_controlled_lists":
self.migrate_collections_to_controlled_lists(
collections_to_migrate=options["collections_to_migrate"],
host=options["host"],
overwrite=options["overwrite"],
preferred_sort_language=options["preferred_sort_language"],
)

def migrate_collections_to_controlled_lists(
self,
collections_to_migrate,
host,
overwrite,
preferred_sort_language,
):
"""
Uses a postgres function to migrate collections to controlled lists
Example usage:
python manage.py etl_processes
-o migrate_collections_to_controlled_lists
-co 'Johns list' 'Getty AAT'
-ho 'http://localhost:8000/plugins/controlled-list-manager/item/'
-psl 'fr'
-ow
"""

collections_in_db = list(
Value.objects.filter(
value__in=collections_to_migrate,
valuetype__in=["prefLabel", "identifier"],
concept__nodetype="Collection",
).values_list("value", flat=True)
)

failed_collections = [
collection
for collection in collections_to_migrate
if collection not in collections_in_db
]

if len(failed_collections) > 0:
self.stdout.write(
"Failed to find the following collections in the database: %s"
% ", ".join(failed_collections)
)

if len(collections_in_db) > 0:
from django.db import connection

cursor = connection.cursor()
cursor.execute(
"""
select * from __arches_migrate_collections_to_clm(
ARRAY[%s], %s, %s::boolean, %s
);
""",
[collections_in_db, host, overwrite, preferred_sort_language],
)
result = cursor.fetchone()
self.stdout.write(result[0])
61 changes: 61 additions & 0 deletions tests/commands/rdm_to_controlled_lists_etl_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import io
from django.core import management
from django.urls import reverse
from django.test.client import Client

from arches.app.models.models import (
ControlledList,
ControlledListItem,
ControlledListItemImage,
ControlledListItemImageMetadata,
ControlledListItemValue,
DValueType,
Language,
)
from arches.app.utils.skos import SKOSReader

from tests.base_test import ArchesTestCase


# these tests can be run from the command line via
# python manage.py test tests.commands.rdm_to_controlled_lists_etl_tests --settings="tests.test_settings"


class RDMToControlledListsETLTests(ArchesTestCase):

@classmethod
def setUpTestData(cls):

skos = SKOSReader()
rdf = skos.read_file("tests/fixtures/data/concept_label_test_collection.xml")
ret = skos.save_concepts_from_skos(rdf)

client = Client()
client.login(username="admin", password="admin")
response = client.get(
reverse(
"make_collection",
kwargs={"conceptid": "7c90899a-dbe9-4574-9175-e69481a80b3c"},
)
)

def test_migrate_collections_to_controlled_lists(self):
output = io.StringIO()
management.call_command(
"etl_processes",
operation="migrate_collections_to_controlled_lists",
collections_to_migrate=["Concept Label Import Test"],
host="http://localhost:8000/plugins/controlled-list-manager/item/",
preferred_sort_language="en",
overwrite=False,
stdout=output,
)

self.assertTrue(
ControlledList.objects.filter(name="Concept Label Import Test").exists()
)
self.assertTrue(
ControlledListItem.objects.filter(
id="89ff530a-f350-44f0-ac88-bdd8904eb57e"
).exists()
)

0 comments on commit 5be923f

Please sign in to comment.