From 6f5249a8b50194d5b6f94c8fd0094f4fbddd4ee3 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 00:51:46 +0200
Subject: [PATCH 01/14] Add data integrity tests for IRIs

---
 tests/test_data.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/test_data.py b/tests/test_data.py
index d0257572e..9551fb39a 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -4,6 +4,7 @@
 
 import logging
 import unittest
+from collections import defaultdict
 
 import bioregistry
 from bioregistry.export.rdf_export import resource_to_rdf_str
@@ -291,3 +292,26 @@ def test_get_rdf(self):
         """Test conversion to RDF."""
         s = resource_to_rdf_str("chebi")
         self.assertIsInstance(s, str)
+
+    def test_unique_iris(self):
+        """Test that all IRIs are unique, or at least there's a mapping to which one is the preferred prefix."""
+        prefix_map = bioregistry.get_format_urls()
+        dd = defaultdict(list)
+        for prefix, iri in prefix_map.items():
+            dd[iri].append(prefix)
+
+        x = {}
+        for iri, prefixes in dd.items():
+            if 1 == len(prefixes):
+                continue
+            resources = {prefix: bioregistry.get_resource(prefix) for prefix in prefixes}
+            parts = {prefix: resource.part_of for prefix, resource in resources.items()}
+            unmapped = [
+                prefix
+                for prefix, part_of in parts.items()
+                if part_of is None
+            ]
+            if len(unmapped) <= 1:
+                continue
+            x[iri] = parts
+        self.assertEqual({}, x)

From 7e0ba379112e95e1a06c84d9ab1ef7354062725c Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 01:02:41 +0200
Subject: [PATCH 02/14] Update test

---
 tests/test_data.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tests/test_data.py b/tests/test_data.py
index 9551fb39a..b334b79e9 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -296,15 +296,22 @@ def test_get_rdf(self):
     def test_unique_iris(self):
         """Test that all IRIs are unique, or at least there's a mapping to which one is the preferred prefix."""
         prefix_map = bioregistry.get_format_urls()
-        dd = defaultdict(list)
+        dd = defaultdict(dict)
         for prefix, iri in prefix_map.items():
-            dd[iri].append(prefix)
+            resource = bioregistry.get_resource(prefix)
+            self.assertIsNotNone(resource)
+            if resource.provides is not None:
+                # Don't consider resources that are providing, such as `ctd.gene`
+                continue
+            dd[iri][prefix] = resource
 
         x = {}
-        for iri, prefixes in dd.items():
-            if 1 == len(prefixes):
+        for iri, resources in dd.items():
+            if 1 == len(resources):
+                # This is a unique IRI, so no issues
                 continue
-            resources = {prefix: bioregistry.get_resource(prefix) for prefix in prefixes}
+
+            # Get parts
             parts = {prefix: resource.part_of for prefix, resource in resources.items()}
             unmapped = [
                 prefix
@@ -313,5 +320,6 @@ def test_unique_iris(self):
             ]
             if len(unmapped) <= 1:
                 continue
+
             x[iri] = parts
         self.assertEqual({}, x)

From 22580a069786d262acf1fd226312024ea34a548c Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 01:02:48 +0200
Subject: [PATCH 03/14] Add additional curations

---
 src/bioregistry/data/bioregistry.json | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index d799e8238..1f851b0f1 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -4326,6 +4326,7 @@
       "prefix": "cath.superfamily",
       "provider_url": "http://www.cathdb.info/cathnode/$1"
     },
+    "part_of": "cath",
     "prefixcommons": {
       "formatter": "http://identifiers.org/cath.superfamily/$1",
       "is_identifiers": true,
@@ -9346,7 +9347,8 @@
       "prefix": "dpo",
       "version": "2021-09-02",
       "version.iri": "http://purl.obolibrary.org/obo/dpo/releases/2021-09-02/dpo.owl"
-    }
+    },
+    "part_of": "fbcv"
   },
   "dpv": {
     "mappings": {
@@ -14072,7 +14074,8 @@
       "is_identifiers": false,
       "is_obo": true,
       "prefix": "GEO"
-    }
+    },
+    "url": "http://purl.obolibrary.org/obo/GEO_$1"
   },
   "gexo": {
     "bioportal": {
@@ -35063,7 +35066,8 @@
       "provider_url": "https://ccg.epfl.ch/cgi-bin/snp2tfbs/snpviewer_form_parser.cgi?snpid=$1",
       "sampleId": "rs11603840"
     },
-    "name": "SNP to Transcription Factor Binding Sites"
+    "name": "SNP to Transcription Factor Binding Sites",
+    "provides": "dbsnp"
   },
   "so": {
     "bioportal": {

From 8d820b29eefdef1e01414babeab3258096f797b8 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 01:13:35 +0200
Subject: [PATCH 04/14] Update test_data.py

---
 tests/test_data.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/test_data.py b/tests/test_data.py
index b334b79e9..c1f7ffc2c 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -313,11 +313,7 @@ def test_unique_iris(self):
 
             # Get parts
             parts = {prefix: resource.part_of for prefix, resource in resources.items()}
-            unmapped = [
-                prefix
-                for prefix, part_of in parts.items()
-                if part_of is None
-            ]
+            unmapped = [prefix for prefix, part_of in parts.items() if part_of is None]
             if len(unmapped) <= 1:
                 continue
 

From 0a46148e28099d199e9361c55eb55aa89d796185 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 01:13:38 +0200
Subject: [PATCH 05/14] Update bioregistry.json

---
 src/bioregistry/data/bioregistry.json | 68 +++++++++------------------
 1 file changed, 23 insertions(+), 45 deletions(-)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index 1f851b0f1..d515a0b28 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -29210,7 +29210,8 @@
       "prefix": "ped.ensemble",
       "provider_url": "https://proteinensemble.org/$1",
       "sampleId": "PED00017e001"
-    }
+    },
+    "part_of": "ped"
   },
   "peff": {
     "deprecated": true,
@@ -39281,6 +39282,12 @@
       "prefix": "wb",
       "provider_url": "https://www.wormbase.org/get?name=$1"
     },
+    "ncbi": {
+      "example": "R13H7",
+      "homepage": "http://www.wormbase.org/",
+      "name": "Caenorhabditis elegans Genome Database",
+      "prefix": "WormBase"
+    },
     "prefixcommons": {
       "formatter": "http://identifiers.org/wb/$1",
       "is_identifiers": true,
@@ -39288,8 +39295,21 @@
       "prefix": "WB"
     },
     "synonyms": [
-      "WB_REF"
-    ]
+      "WB_REF",
+      "wormbase"
+    ],
+    "uniprot": {
+      "category": "Organism-specific databases",
+      "formatter": "https://wormbase.org/db/seq/protein?name=%s;class=CDS",
+      "identifier": "110",
+      "link_is_explicit": "true",
+      "name": "WormBase",
+      "prefix": "WormBase"
+    },
+    "wikidata": {
+      "database": "Q3570042",
+      "prefix": "P3860"
+    }
   },
   "wb.rnai": {
     "mappings": {
@@ -39758,48 +39778,6 @@
       "prefix": "WORFDB"
     }
   },
-  "wormbase": {
-    "example": "C05G5/12462-12364",
-    "go": {
-      "formatter": "http://www.wormbase.org/get?name=$1",
-      "homepage": "http://www.wormbase.org/",
-      "name": "WormBase database of nematode biology",
-      "prefix": "WB_REF"
-    },
-    "homepage": "https://wormbase.org",
-    "mappings": {
-      "go": "WB_REF",
-      "ncbi": "WormBase",
-      "prefixcommons": "WormBase",
-      "uniprot": "WormBase",
-      "uniprot.database": "WormBase"
-    },
-    "name": "WormBase",
-    "ncbi": {
-      "example": "R13H7",
-      "homepage": "http://www.wormbase.org/",
-      "name": "Caenorhabditis elegans Genome Database",
-      "prefix": "WormBase"
-    },
-    "prefixcommons": {
-      "formatter": "https://www.wormbase.org/get?name=$1",
-      "is_identifiers": false,
-      "is_obo": false,
-      "prefix": "WormBase"
-    },
-    "uniprot": {
-      "category": "Organism-specific databases",
-      "formatter": "https://wormbase.org/db/seq/protein?name=%s;class=CDS",
-      "identifier": "110",
-      "link_is_explicit": "true",
-      "name": "WormBase",
-      "prefix": "WormBase"
-    },
-    "wikidata": {
-      "database": "Q3570042",
-      "prefix": "P3860"
-    }
-  },
   "wormpep": {
     "mappings": {
       "miriam": "wormpep",

From ab0b2730c55cd0d1e6522ef80f32c641e2df4a4c Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 01:27:16 +0200
Subject: [PATCH 06/14] Add canonical mapping

this needs a better name
---
 src/bioregistry/data/bioregistry.json | 3 +++
 src/bioregistry/schema/struct.py      | 2 ++
 tests/test_data.py                    | 9 ++++++++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index d515a0b28..0f59fae1a 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -18280,6 +18280,7 @@
       "prefix": "insdc.cds",
       "provider_url": "https://www.ncbi.nlm.nih.gov/protein/$1"
     },
+    "pmapto": "ncbiprotein",
     "prefixcommons": {
       "formatter": "http://identifiers.org/insdc.cds/$1",
       "is_identifiers": true,
@@ -18334,6 +18335,7 @@
     "homepage": "https://www.insdc.org/",
     "name": "International Nucleotide Sequence Database Collaboration (INSDC) Run",
     "pattern": "^(E|D|S)RR[0-9]{6,}$",
+    "pmapto": "ena.embl",
     "url": "https://www.ebi.ac.uk/ena/browser/view/$1"
   },
   "insdc.sra": {
@@ -32487,6 +32489,7 @@
       "provider_url": "https://www.ncbi.nlm.nih.gov/protein/$1"
     },
     "name": "Reference Sequence Collection",
+    "pmapto": "ncbiprotein",
     "prefixcommons": {
       "formatter": "http://www.ncbi.nlm.nih.gov/refseq/?term=$1",
       "is_identifiers": false,
diff --git a/src/bioregistry/schema/struct.py b/src/bioregistry/schema/struct.py
index 83eef3c9a..93300031f 100644
--- a/src/bioregistry/schema/struct.py
+++ b/src/bioregistry/schema/struct.py
@@ -116,6 +116,8 @@ class Resource(BaseModel):
     contributor: Optional[Author]
     #: Set to true if this database is proprietary. If missing, assume it's not.
     proprietary: Optional[bool]
+    #: If this shares an IRI with another entry, maps to which should be used
+    pmapto: Optional[str]
 
     # Registry-specific data
     miriam: Optional[Mapping[str, Any]]
diff --git a/tests/test_data.py b/tests/test_data.py
index c1f7ffc2c..08dfd1dfa 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -317,5 +317,12 @@ def test_unique_iris(self):
             if len(unmapped) <= 1:
                 continue
 
-            x[iri] = parts
+            # Get pmaps
+            pmaptos = {prefix: resource.pmapto for prefix, resource in resources.items()}
+            canonical = [prefix for prefix, pmapto in pmaptos.items() if pmapto is None]
+            targets = list({pmapto for prefix, pmapto in pmaptos.items() if pmapto is not None})
+            if len(canonical) == 1 and len(targets) == 1 and canonical[0] == targets[0]:
+                continue
+
+            x[iri] = canonical, targets
         self.assertEqual({}, x)

From cefe6a0d9ce457216a8f6cbe6988beb82db4a814 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 10:46:37 +0200
Subject: [PATCH 07/14] Add more curations

---
 src/bioregistry/data/bioregistry.json | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index 0f59fae1a..f7dc71d68 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -4362,6 +4362,7 @@
       "prefix": "cattleqtldb",
       "provider_url": "http://www.animalgenome.org/cgi-bin/QTLdb/BT/qdetails?QTL_ID=$1"
     },
+    "part_of": "qtldb",
     "prefixcommons": {
       "formatter": "http://identifiers.org/cattleqtldb/$1",
       "is_identifiers": true,
@@ -5449,6 +5450,7 @@
       "prefix": "chickenqtldb",
       "provider_url": "http://www.animalgenome.org/cgi-bin/QTLdb/GG/qdetails?QTL_ID=$1"
     },
+    "part_of": "qtldb",
     "prefixcommons": {
       "formatter": "http://identifiers.org/chickenqtldb/$1",
       "is_identifiers": true,
@@ -14244,6 +14246,7 @@
     }
   },
   "glycomedb": {
+    "comment": "this is exactly the same as glytoucan. Idk why there are two different ones",
     "mappings": {
       "miriam": "glycomedb",
       "n2t": "glycomedb",
@@ -14271,6 +14274,7 @@
       "prefix": "glycomedb",
       "provider_url": "https://glytoucan.org/Structures/Glycans/$1"
     },
+    "pmapto": "glytoucan",
     "prefixcommons": {
       "formatter": "http://identifiers.org/glycomedb/$1",
       "is_identifiers": true,
@@ -24094,6 +24098,13 @@
       "version": "1.0.1"
     }
   },
+  "multicellds": {
+    "description": "MultiCellDS is data standard for multicellular simulation, experimental, and clinical data. A digital cell line is a hierarchical organization of quantitative phenotype data for a single biological cell line, including the microenvironmental context of the measurements and essential metadata.",
+    "example": "MCDS_S_0000000001",
+    "homepage": "http://multicellds.org/MultiCellDB.php",
+    "name": "MultiCellDS",
+    "url": "http://multicellds.org/MultiCellDB/$1"
+  },
   "multicellds.cell_line": {
     "mappings": {
       "miriam": "multicellds.cell_line",
@@ -24122,6 +24133,7 @@
       "prefix": "multicellds.cell_line",
       "provider_url": "http://multicellds.org/MultiCellDB/$1"
     },
+    "part_of": "multicellds",
     "prefixcommons": {
       "formatter": "http://identifiers.org/multicellds.cell_line/$1",
       "is_identifiers": true,
@@ -24157,6 +24169,7 @@
       "prefix": "multicellds.collection",
       "provider_url": "http://multicellds.org/MultiCellDB/$1"
     },
+    "part_of": "multicellds",
     "prefixcommons": {
       "formatter": "http://identifiers.org/multicellds.collection/$1",
       "is_identifiers": true,
@@ -24192,6 +24205,7 @@
       "prefix": "multicellds.snapshot",
       "provider_url": "http://multicellds.org/MultiCellDB/$1"
     },
+    "part_of": "multicellds",
     "prefixcommons": {
       "formatter": "http://identifiers.org/multicellds.snapshot/$1",
       "is_identifiers": true,
@@ -30073,6 +30087,7 @@
       "prefix": "pigqtldb",
       "provider_url": "http://www.animalgenome.org/cgi-bin/QTLdb/SS/qdetails?QTL_ID=$1"
     },
+    "part_of": "qtldb",
     "prefixcommons": {
       "formatter": "http://identifiers.org/pigqtldb/$1",
       "is_identifiers": true,
@@ -34470,6 +34485,7 @@
       "prefix": "sheepqtldb",
       "provider_url": "http://www.animalgenome.org/cgi-bin/QTLdb/OA/qdetails?QTL_ID=$1"
     },
+    "part_of": "qtldb",
     "prefixcommons": {
       "formatter": "http://identifiers.org/sheepqtldb/$1",
       "is_identifiers": true,
@@ -36137,6 +36153,7 @@
     }
   },
   "tair.gene": {
+    "example": "2200934",
     "mappings": {
       "miriam": "tair.gene",
       "n2t": "tair.gene",
@@ -36164,12 +36181,14 @@
       "prefix": "tair.gene",
       "provider_url": "http://arabidopsis.org/servlets/TairObject?accession=$1"
     },
+    "pattern": "^\\d{7}$",
     "prefixcommons": {
       "formatter": "http://identifiers.org/tair.gene/$1",
       "is_identifiers": true,
       "is_obo": false,
       "prefix": "TAIR.GENE"
-    }
+    },
+    "url": "http://arabidopsis.org/servlets/TairObject?accession=Gene:$1"
   },
   "tair.locus": {
     "go": {
@@ -36220,6 +36239,7 @@
     }
   },
   "tair.protein": {
+    "example": "1009107926",
     "mappings": {
       "miriam": "tair.protein",
       "n2t": "tair.protein",
@@ -36247,12 +36267,14 @@
       "prefix": "tair.protein",
       "provider_url": "http://arabidopsis.org/servlets/TairObject?accession=$1"
     },
+    "pattern": "^\\d{10}$",
     "prefixcommons": {
       "formatter": "http://identifiers.org/tair.protein/$1",
       "is_identifiers": true,
       "is_obo": false,
       "prefix": "TAIR.PROTEIN"
-    }
+    },
+    "url": "http://arabidopsis.org/servlets/TairObject?accession=AASequence:$1"
   },
   "tao": {
     "bioportal": {

From cc56d6c6333f1fb0f97820c504d5824c84560adb Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 10:50:51 +0200
Subject: [PATCH 08/14] Pass tests

---
 src/bioregistry/data/bioregistry.json | 2 ++
 tests/test_data.py                    | 1 +
 2 files changed, 3 insertions(+)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index f7dc71d68..bd3f1ed72 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -28961,6 +28961,7 @@
     }
   },
   "pdb-ccd": {
+    "comment": "might be same as pdb.ligand, not sure though",
     "mappings": {
       "miriam": "pdb-ccd",
       "n2t": "pdb-ccd",
@@ -28988,6 +28989,7 @@
       "prefix": "pdb-ccd",
       "provider_url": "https://www.ebi.ac.uk/pdbe-srv/pdbechem/chemicalCompound/show/$1"
     },
+    "pmapto": "pdb.ligand",
     "prefixcommons": {
       "formatter": "http://identifiers.org/pdb-ccd/$1",
       "is_identifiers": true,
diff --git a/tests/test_data.py b/tests/test_data.py
index 08dfd1dfa..52d465dae 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -59,6 +59,7 @@ def test_keys(self):
             "comment",
             "contributor",
             "proprietary",
+            "pmapto",
         }
         keys.update(bioregistry.read_metaregistry())
         for prefix, entry in self.registry.items():

From aada6ca17c6168f58739675505b292d2995d8831 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 10:57:52 +0200
Subject: [PATCH 09/14] Give better name to property

---
 src/bioregistry/data/bioregistry.json | 10 +++++-----
 src/bioregistry/schema/struct.py      |  4 ++--
 tests/test_data.py                    | 14 +++++++-------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json
index bd3f1ed72..1eec7e11f 100644
--- a/src/bioregistry/data/bioregistry.json
+++ b/src/bioregistry/data/bioregistry.json
@@ -14247,6 +14247,7 @@
   },
   "glycomedb": {
     "comment": "this is exactly the same as glytoucan. Idk why there are two different ones",
+    "has_canonical": "glytoucan",
     "mappings": {
       "miriam": "glycomedb",
       "n2t": "glycomedb",
@@ -14274,7 +14275,6 @@
       "prefix": "glycomedb",
       "provider_url": "https://glytoucan.org/Structures/Glycans/$1"
     },
-    "pmapto": "glytoucan",
     "prefixcommons": {
       "formatter": "http://identifiers.org/glycomedb/$1",
       "is_identifiers": true,
@@ -18257,6 +18257,7 @@
     }
   },
   "insdc.cds": {
+    "has_canonical": "ncbiprotein",
     "mappings": {
       "miriam": "insdc.cds",
       "n2t": "insdc.cds",
@@ -18284,7 +18285,6 @@
       "prefix": "insdc.cds",
       "provider_url": "https://www.ncbi.nlm.nih.gov/protein/$1"
     },
-    "pmapto": "ncbiprotein",
     "prefixcommons": {
       "formatter": "http://identifiers.org/insdc.cds/$1",
       "is_identifiers": true,
@@ -18336,10 +18336,10 @@
     },
     "description": "An experimental run, served thrugh the ENA",
     "example": "ERR436051",
+    "has_canonical": "ena.embl",
     "homepage": "https://www.insdc.org/",
     "name": "International Nucleotide Sequence Database Collaboration (INSDC) Run",
     "pattern": "^(E|D|S)RR[0-9]{6,}$",
-    "pmapto": "ena.embl",
     "url": "https://www.ebi.ac.uk/ena/browser/view/$1"
   },
   "insdc.sra": {
@@ -28962,6 +28962,7 @@
   },
   "pdb-ccd": {
     "comment": "might be same as pdb.ligand, not sure though",
+    "has_canonical": "pdb.ligand",
     "mappings": {
       "miriam": "pdb-ccd",
       "n2t": "pdb-ccd",
@@ -28989,7 +28990,6 @@
       "prefix": "pdb-ccd",
       "provider_url": "https://www.ebi.ac.uk/pdbe-srv/pdbechem/chemicalCompound/show/$1"
     },
-    "pmapto": "pdb.ligand",
     "prefixcommons": {
       "formatter": "http://identifiers.org/pdb-ccd/$1",
       "is_identifiers": true,
@@ -32475,6 +32475,7 @@
       "name": "RefSeq",
       "prefix": "RefSeq"
     },
+    "has_canonical": "ncbiprotein",
     "mappings": {
       "go": "RefSeq",
       "miriam": "refseq",
@@ -32506,7 +32507,6 @@
       "provider_url": "https://www.ncbi.nlm.nih.gov/protein/$1"
     },
     "name": "Reference Sequence Collection",
-    "pmapto": "ncbiprotein",
     "prefixcommons": {
       "formatter": "http://www.ncbi.nlm.nih.gov/refseq/?term=$1",
       "is_identifiers": false,
diff --git a/src/bioregistry/schema/struct.py b/src/bioregistry/schema/struct.py
index 93300031f..512b985e3 100644
--- a/src/bioregistry/schema/struct.py
+++ b/src/bioregistry/schema/struct.py
@@ -116,8 +116,8 @@ class Resource(BaseModel):
     contributor: Optional[Author]
     #: Set to true if this database is proprietary. If missing, assume it's not.
     proprietary: Optional[bool]
-    #: If this shares an IRI with another entry, maps to which should be used
-    pmapto: Optional[str]
+    #: If this shares an IRI with another entry, maps to which should be be considered as canonical
+    has_canonical: Optional[str]
 
     # Registry-specific data
     miriam: Optional[Mapping[str, Any]]
diff --git a/tests/test_data.py b/tests/test_data.py
index 52d465dae..d86ecbd04 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -59,7 +59,7 @@ def test_keys(self):
             "comment",
             "contributor",
             "proprietary",
-            "pmapto",
+            "has_canonical",
         }
         keys.update(bioregistry.read_metaregistry())
         for prefix, entry in self.registry.items():
@@ -318,12 +318,12 @@ def test_unique_iris(self):
             if len(unmapped) <= 1:
                 continue
 
-            # Get pmaps
-            pmaptos = {prefix: resource.pmapto for prefix, resource in resources.items()}
-            canonical = [prefix for prefix, pmapto in pmaptos.items() if pmapto is None]
-            targets = list({pmapto for prefix, pmapto in pmaptos.items() if pmapto is not None})
-            if len(canonical) == 1 and len(targets) == 1 and canonical[0] == targets[0]:
+            # Get canonical
+            canonicals = {prefix: resource.has_canonical for prefix, resource in resources.items()}
+            canonical_target = [prefix for prefix, target in canonicals.items() if target is None]
+            all_targets = list({target for prefix, target in canonicals.items() if target is not None})
+            if len(canonical_target) == 1 and len(all_targets) == 1 and canonical_target[0] == all_targets[0]:
                 continue
 
-            x[iri] = canonical, targets
+            x[iri] = parts, unmapped, canonical_target, all_targets
         self.assertEqual({}, x)

From c926e3552773110f06775eccea1360fae2dfc1b8 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 11:02:02 +0200
Subject: [PATCH 10/14] Add RDF schema annotations

---
 src/bioregistry/export/rdf_export.py | 4 ++++
 src/bioregistry/schema/constants.py  | 2 ++
 src/bioregistry/schema/schema.json   | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/src/bioregistry/export/rdf_export.py b/src/bioregistry/export/rdf_export.py
index c30ce4008..b0c83491e 100644
--- a/src/bioregistry/export/rdf_export.py
+++ b/src/bioregistry/export/rdf_export.py
@@ -187,6 +187,10 @@ def _add_resource(data, *, graph: Optional[rdflib.Graph] = None) -> Tuple[rdflib
     if provides:
         graph.add((node, bioregistry_schema["providesFor"], bioregistry_resource[provides]))
 
+    canonical = data.get("has_canonical")
+    if canonical:
+        graph.add((node, bioregistry_schema["hasCanonical"], bioregistry_resource[canonical]))
+
     # TODO add contributor if it's available
 
     graph.add(
diff --git a/src/bioregistry/schema/constants.py b/src/bioregistry/schema/constants.py
index c530f76d5..3de605107 100644
--- a/src/bioregistry/schema/constants.py
+++ b/src/bioregistry/schema/constants.py
@@ -40,6 +40,8 @@
     "hasMapping": "A property whose subject is a resource and object is a mapping",
     "hasRegistry": "A property whose subject is a mapping and object is a metaresource.",
     "hasMetaidentifier": "A property whose subject is a mapping and object is an identifier string.",
+    "hasCanonical": "A property connecting two prefixes that share an IRI where the subject is "
+    "the non-preferred prefix and the target is the preferred prefix",
 }
 bioregistry_collection = rdflib.namespace.Namespace("https://bioregistry.io/collection/")
 bioregistry_resource = rdflib.namespace.Namespace("https://bioregistry.io/registry/")
diff --git a/src/bioregistry/schema/schema.json b/src/bioregistry/schema/schema.json
index 6c87344cf..4073ac2fb 100644
--- a/src/bioregistry/schema/schema.json
+++ b/src/bioregistry/schema/schema.json
@@ -181,6 +181,10 @@
           "title": "Proprietary",
           "type": "boolean"
         },
+        "has_canonical": {
+          "title": "Has Canonical",
+          "type": "string"
+        },
         "miriam": {
           "title": "Miriam",
           "type": "object"

From 9d62fc00b71ef13005976322154704565abc4dc4 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 11:02:05 +0200
Subject: [PATCH 11/14] Lint

---
 tests/test_data.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/test_data.py b/tests/test_data.py
index d86ecbd04..7e792b111 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -321,8 +321,14 @@ def test_unique_iris(self):
             # Get canonical
             canonicals = {prefix: resource.has_canonical for prefix, resource in resources.items()}
             canonical_target = [prefix for prefix, target in canonicals.items() if target is None]
-            all_targets = list({target for prefix, target in canonicals.items() if target is not None})
-            if len(canonical_target) == 1 and len(all_targets) == 1 and canonical_target[0] == all_targets[0]:
+            all_targets = list(
+                {target for prefix, target in canonicals.items() if target is not None}
+            )
+            if (
+                len(canonical_target) == 1
+                and len(all_targets) == 1
+                and canonical_target[0] == all_targets[0]
+            ):
                 continue
 
             x[iri] = parts, unmapped, canonical_target, all_targets

From 8d2fa6b0225fdbbce42331213c287b7ff6097484 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 11:16:20 +0200
Subject: [PATCH 12/14] Update tsv_export.py

---
 src/bioregistry/export/tsv_export.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/bioregistry/export/tsv_export.py b/src/bioregistry/export/tsv_export.py
index 3e1fb1fa1..0878681a3 100644
--- a/src/bioregistry/export/tsv_export.py
+++ b/src/bioregistry/export/tsv_export.py
@@ -73,6 +73,7 @@ def export_tsv():
     *METAPREFIXES,
     "part_of",
     "provides",
+    "has_canonical",
     # 'type',
 ]
 
@@ -139,6 +140,7 @@ def get_registry_rows():
                 # '|'.join(data.get('appears_in', [])),
                 data.part_of,
                 data.provides,
+                data.has_canonical,
                 # data.get('type'),
                 # TODO could add more, especially mappings
             )

From 53e8ca6f1c2f489f5d8cf244a313019dd913c671 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 11:16:46 +0200
Subject: [PATCH 13/14] Update upload_ndex.py

---
 src/bioregistry/upload_ndex.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/bioregistry/upload_ndex.py b/src/bioregistry/upload_ndex.py
index cccdcef9d..0ec18afe3 100644
--- a/src/bioregistry/upload_ndex.py
+++ b/src/bioregistry/upload_ndex.py
@@ -53,6 +53,12 @@ def upload():
                 target=resource_nodes[target],
                 interaction="provides",
             )
+        if entry.has_canonical:
+            cx.add_edge(
+                source=resource_nodes[prefix],
+                target=resource_nodes[entry.has_canonical],
+                interaction="has_canonical",
+            )
 
         # Which registries does it map to?
         for metaprefix in metaregistry:

From d7546ccb12e0dd607a1c18a85e85c84371d3e0cd Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 24 Sep 2021 11:17:17 +0200
Subject: [PATCH 14/14] Update test_data.py

---
 tests/test_data.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/test_data.py b/tests/test_data.py
index 7e792b111..252a0aaad 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -294,6 +294,22 @@ def test_get_rdf(self):
         s = resource_to_rdf_str("chebi")
         self.assertIsInstance(s, str)
 
+    def test_provides(self):
+        """Make sure all provides relations point to valid prefixes."""
+        for prefix, resource in self.registry.items():
+            if resource.provides is None:
+                continue
+            with self.subTest(prefix=prefix):
+                self.assertIn(resource.provides, self.registry)
+
+    def test_has_canonical(self):
+        """Make sure all has_canonical relations point to valid prefixes."""
+        for prefix, resource in self.registry.items():
+            if resource.has_canonical is None:
+                continue
+            with self.subTest(prefix=prefix):
+                self.assertIn(resource.has_canonical, self.registry)
+
     def test_unique_iris(self):
         """Test that all IRIs are unique, or at least there's a mapping to which one is the preferred prefix."""
         prefix_map = bioregistry.get_format_urls()