Skip to content

Commit

Permalink
Create dataclass for reference datatype and improve validation #45 (#59)
Browse files Browse the repository at this point in the history
* Fix listid -> list_id

* Validate against multiValue configuration
  • Loading branch information
jacobtylerwalls authored Feb 11, 2025
1 parent 14a9ec4 commit db55989
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 83 deletions.
153 changes: 96 additions & 57 deletions arches_references/datatypes/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,62 @@
import uuid
from dataclasses import asdict, dataclass

from django.db.models.fields.json import JSONField
from django.utils.translation import get_language, gettext as _

from arches.app.datatypes.base import BaseDataType
from arches.app.models.models import Node
from arches.app.models.graph import GraphValidationError

from arches_references.models import ListItem


@dataclass(kw_only=True)
class ReferenceLabel:
id: uuid.UUID
value: str
language_id: str
valuetype_id: str
list_item_id: uuid.UUID


@dataclass(kw_only=True)
class Reference:
uri: str
labels: list[ReferenceLabel]
list_id: uuid.UUID


class ReferenceDataType(BaseDataType):
rest_framework_model_field = JSONField(null=True)

def to_python(self, value):
if value is None:
return None
if not value:
raise ValueError(_("Reference datatype value cannot be empty"))

references = []
for reference in value:
incoming_args = {**reference}
if labels := incoming_args.get("labels"):
incoming_args["labels"] = [
ReferenceLabel(**label) for label in incoming_args["labels"]
]
elif labels == []:
incoming_args.pop("labels")
references.append(Reference(**incoming_args))

return references

def serialize(self, value):
if isinstance(value, list):
return [
asdict(reference) if isinstance(reference, Reference) else {**reference}
for reference in value
]
return value

def validate(
self,
value,
Expand All @@ -22,66 +67,60 @@ def validate(
strict=False,
**kwargs,
):
errors = []
title = _("Invalid Reference Datatype Value")
if value is None:
return errors

if type(value) == list and len(value):
for reference in value:
if "uri" in reference and len(reference["uri"]):
pass
else:
errors.append(
{
"type": "ERROR",
"message": _(
"Reference objects require a 'uri' property and corresponding value"
),
"title": title,
}
)
if "labels" in reference:
pref_label_languages = []
for label in reference["labels"]:
if not all(
key in label
for key in ("id", "value", "language_id", "valuetype_id")
):
errors.append(
{
"type": "ERROR",
"message": _(
"Reference labels require properties: id(uuid), value(string), language_id(e.g. 'en'), and valuetype_id(e.g. 'prefLabel')"
),
"title": title,
}
)
if label["valuetype_id"] == "prefLabel":
pref_label_languages.append(label["language_id"])

if len(set(pref_label_languages)) < len(pref_label_languages):
errors.append(
{
"type": "ERROR",
"message": _(
"A reference can have only one prefLabel per language"
),
"title": title,
}
)
else:
errors.append(
{
"type": "ERROR",
"message": _("Reference value must be a list of reference objects"),
"title": title,
}
)
return errors
try:
parsed = self.to_python(value)
self.validate_pref_labels(parsed)
self.validate_multivalue(parsed, node, nodeid)
except Exception as e:
return [self.transform_exception(e)]
return []

def validate_pref_labels(self, references: list[Reference]):
for reference in references:
pref_label_languages = [
label.language_id
for label in reference.labels
if label.valuetype_id == "prefLabel"
]
if len(set(pref_label_languages)) < len(pref_label_languages):
msg = _("A reference can have only one prefLabel per language")
raise ValueError(msg)

def validate_multivalue(self, parsed, node, nodeid):
if not node:
if not nodeid:
raise ValueError
try:
node = Node.objects.get(nodeid=nodeid)
except Node.DoesNotExist:
return
if not node.config.get("multiValue") and len(parsed) > 1:
raise ValueError(_("This node does not allow multiple references."))

@staticmethod
def transform_exception(e):
message = _("Unknown error")
if isinstance(e, TypeError) and e.args:
# Localize the error raised by the dataclass constructor.
if "__init__() missing" in e.args[0]:
message = _(
"Missing required value(s): {}".format(e.args[0].split(": ")[-1])
)
elif "unexpected keyword argument" in e.args[0]:
message = _(
"Unexpected value: {}".format(e.args[0].split("argument ")[-1])
)
elif isinstance(e, ValueError) and e.args:
message = e.args[0]
return {
"type": "ERROR",
"message": message,
"title": _("Invalid Reference Datatype Value"),
}

def transform_value_for_tile(self, value, **kwargs):
list_id = kwargs.get("controlledList")
value = self.serialize(value)
if (
isinstance(value, list)
and isinstance(value[0], dict)
Expand Down
8 changes: 4 additions & 4 deletions arches_references/media/js/viewmodels/reference-select.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ define([
const newItem = selection.map(uri => {
return {
"labels": NAME_LOOKUP[uri].labels,
"listid": NAME_LOOKUP[uri]["listid"],
"list_id": NAME_LOOKUP[uri]["list_id"],
"uri": uri
};
});
Expand Down Expand Up @@ -90,7 +90,7 @@ define([
processResults: function(data) {
const items = data.items;
items.forEach(item => {
item["listid"] = item.id;
item["list_id"] = item.id;
item.id = item.uri;
item.disabled = item.guide;
item.labels = item.values.filter(val => self.isLabel(val));
Expand All @@ -111,7 +111,7 @@ define([

if (item.uri) {
const text = self.getPrefLabel(item.labels) || arches.translations.searching + '...';
NAME_LOOKUP[item.uri] = {"prefLabel": text, "labels": item.labels, "listid": item.list_id};
NAME_LOOKUP[item.uri] = {"prefLabel": text, "labels": item.labels, "list_id": item.list_id};
return indentation + text;
}
},
Expand All @@ -132,7 +132,7 @@ define([
NAME_LOOKUP[value.uri] = {
"prefLabel": self.getPrefLabel(value.labels),
"labels": value.labels,
"listid": value.listid
"list_id": value.list_id,
};
});

Expand Down
2 changes: 1 addition & 1 deletion arches_references/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def build_tile_value(self):
tile_value = {
"uri": self.uri or self.generate_uri(),
"labels": [label.serialize() for label in self.list_item_values.labels()],
"listid": str(self.list_id),
"list_id": str(self.list_id),
}
return tile_value

Expand Down
90 changes: 69 additions & 21 deletions tests/reference_datatype_tests.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import uuid
from types import SimpleNamespace

from django.test import TestCase
from arches.app.datatypes.datatypes import DataTypeFactory
from arches.app.models.tile import Tile
from arches_references.models import List, ListItem, ListItemValue
from django.test import TestCase
from types import SimpleNamespace

from tests.test_views import ListTests

# these tests can be run from the command line via
# python manage.py test tests.reference_datatype_tests --settings="tests.test_settings"
Expand All @@ -12,22 +16,41 @@ class ReferenceDataTypeTests(TestCase):

@classmethod
def setUpTestData(cls):
from tests.test_views import ListTests

return ListTests.setUpTestData()

def test_validate(self):
reference = DataTypeFactory().get_instance("reference")

for value in [
"",
[],
[{}], # reference has no 'uri'
[{"uri": ""}], # reference uri is empty
mock_node = SimpleNamespace(config={"multiValue": False})

for value, message in [
("", "Reference datatype value cannot be empty"),
([], "Reference datatype value cannot be empty"),
([{}], "Missing required value(s): 'uri', 'labels', and 'list_id'"),
(
[
{
"uri": "",
"labels": [], # notice [] rather than None
"list_id": str(uuid.uuid4()),
}
],
"Missing required value(s): 'labels'",
),
(
[
{
"uri": "https://www.domain.com/123",
"labels": [],
"garbage_key": "garbage_value",
}
],
"Unexpected value: 'garbage_key'",
),
]:
with self.subTest(reference_value=value):
errors = reference.validate(value)
self.assertTrue(len(errors) > 0)
errors = reference.validate(value, node=mock_node)
self.assertEqual(len(errors), 1, errors)
self.assertEqual(errors[0]["message"], message)

data = {
"uri": "https://www.domain.com/label",
Expand All @@ -36,30 +59,45 @@ def test_validate(self):
"id": "23b4efbd-2e46-4b3f-8d75-2f3b2bb96af2",
"value": "label",
"language_id": "en",
"list_item_id": str(uuid.uuid4()),
"valuetype_id": "prefLabel",
},
{
"id": "e8676242-f0c7-4e3d-b031-fded4960cd86",
"language_id": "de",
"list_item_id": str(uuid.uuid4()),
"valuetype_id": "prefLabel",
},
],
"list_id": uuid.uuid4(),
}

errors = reference.validate(value=[data]) # label missing value property
self.assertIsNotNone(errors)
# Label missing value property
errors = reference.validate(value=[data], node=mock_node)
self.assertEqual(len(errors), 1, errors)

data["labels"][1]["value"] = "a label"
data["labels"][1]["language_id"] = "en"

errors = reference.validate(value=[data]) # too many prefLabels per language
self.assertIsNotNone(errors)
# Too many prefLabels per language
errors = reference.validate(value=[data], node=mock_node)
self.assertEqual(len(errors), 1, errors)

data["labels"][1]["value"] = "ein label"
data["labels"][1]["language_id"] = "de"
data["labels"][1]["list_item_id"] = str(uuid.uuid4())

# Valid
errors = reference.validate(value=[data], node=mock_node)
self.assertEqual(errors, [])

# Too many references
errors = reference.validate(value=[data, data], node=mock_node)
self.assertEqual(len(errors), 1, errors)

errors = reference.validate(value=[data]) # data should be valid
self.assertTrue(len(errors) == 0)
# User error (missing arguments)
errors = reference.validate(value=[data])
self.assertEqual(len(errors), 1, errors)

def test_tile_clean(self):
reference = DataTypeFactory().get_instance("reference")
Expand All @@ -74,9 +112,10 @@ def test_tile_clean(self):
"value": "label",
"language_id": "en",
"valuetype_id": "prefLabel",
"list_item_id": str(uuid.uuid4()),
},
],
"listid": "fd9508dc-2aab-4c46-85ae-dccce1200035",
"list_id": "fd9508dc-2aab-4c46-85ae-dccce1200035",
}
]

Expand All @@ -96,6 +135,15 @@ def test_tile_clean(self):
reference.clean(tile1, nodeid)
self.assertIsNone(tile1.data[nodeid])

def test_dataclass_roundtrip(self):
reference = DataTypeFactory().get_instance("reference")
list1_pk = str(List.objects.get(name="list1").pk)
config = {"controlledList": list1_pk}
tile_val = reference.transform_value_for_tile("label1-pref", **config)
materialized = reference.to_python(tile_val)
tile_val_reparsed = reference.transform_value_for_tile(materialized, **config)
self.assertEqual(tile_val_reparsed, tile_val)

def test_transform_value_for_tile(self):
reference = DataTypeFactory().get_instance("reference")
list1_pk = str(List.objects.get(name="list1").pk)
Expand All @@ -105,7 +153,7 @@ def test_transform_value_for_tile(self):
self.assertTrue(isinstance(tile_value1, list))
self.assertTrue("uri" in tile_value1[0])
self.assertTrue("labels" in tile_value1[0])
self.assertTrue("listid" in tile_value1[0])
self.assertTrue("list_id" in tile_value1[0])

self.assertIsNone(reference.transform_value_for_tile(None, **config))

Expand Down Expand Up @@ -154,7 +202,7 @@ def test_get_display_value(self):
"valuetype_id": "prefLabel",
},
],
"listid": "a8da34eb-575b-498c-ada7-161ee745fd16",
"list_id": "a8da34eb-575b-498c-ada7-161ee745fd16",
}
]
},
Expand Down

0 comments on commit db55989

Please sign in to comment.