mapping-commons · yarikoptic · Feb 6, 2025 · Feb 6, 2025 · Feb 6, 2025 · Feb 6, 2025
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/project/jsonld/sssom_schema.jsonld b/project/jsonld/sssom_schema.jsonld
@@ -1844,7 +1844,7 @@
     {
       "name": "similarity_score",
       "definition_uri": "https://w3id.org/sssom/similarity_score",
-      "description": "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.",
+      "description": "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointedness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.",
       "from_schema": "https://w3id.org/sssom/schema/",
       "see_also": [
         "https://github.com/mapping-commons/sssom/issues/385",
@@ -2407,7 +2407,7 @@
     {
       "name": "NoTermFound",
       "definition_uri": "https://w3id.org/sssom/NoTermFound",
-      "description": "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjuction with a corresponding subject_source or object_source to signify where the term was not found.",
+      "description": "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found.",
       "from_schema": "https://w3id.org/sssom/schema/",
       "see_also": [
         "https://github.com/mapping-commons/sssom/issues/28",

diff --git a/project/jsonschema/sssom_schema.schema.json b/project/jsonschema/sssom_schema.schema.json
@@ -350,7 +350,7 @@
                     "type": "string"
                 },
                 "similarity_score": {
-                    "description": "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.",
+                    "description": "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointedness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.",
                     "maximum": 1.0,
                     "minimum": 0.0,
                     "type": "number"
@@ -678,7 +678,7 @@
         },
         "NoTermFound": {
             "additionalProperties": false,
-            "description": "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjuction with a corresponding subject_source or object_source to signify where the term was not found.",
+            "description": "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found.",
             "title": "NoTermFound",
             "type": "object"
         },

diff --git a/project/shacl/sssom_schema.shacl.ttl b/project/shacl/sssom_schema.shacl.ttl
@@ -239,7 +239,7 @@ sssom:MappingSet a sh:NodeShape ;
 
 sssom:NoTermFound a sh:NodeShape ;
     sh:closed true ;
-    sh:description "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjuction with a corresponding subject_source or object_source to signify where the term was not found." ;
+    sh:description "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found." ;
     sh:ignoredProperties ( rdf:type ) ;
     sh:targetClass sssom:NoTermFound .
 
@@ -411,7 +411,7 @@ owl:Axiom a sh:NodeShape ;
             sh:order 28 ;
             sh:path pav:authoredOn ],
         [ sh:datatype xsd:double ;
-            sh:description "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm." ;
+            sh:description "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointedness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm." ;
             sh:maxCount 1 ;
             sh:maxInclusive 1e+00 ;
             sh:minInclusive 0e+00 ;

diff --git a/project/sqlschema/sssom_schema.sql b/project/sqlschema/sssom_schema.sql
@@ -46,7 +46,7 @@
 --     * Slot: mapping_date Description: The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file.
 --     * Slot: publication_date Description: The date the mapping was published. This is different from the date the mapping was asserted.
 --     * Slot: confidence Description: A score between 0 and 1 to denote the confidence or probability that the match is correct, where 1 denotes total confidence.
---     * Slot: similarity_score Description: A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.
+--     * Slot: similarity_score Description: A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointedness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.
 --     * Slot: similarity_measure Description: The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified.
 --     * Slot: issue_tracker_item Description: The issue tracker item discussing this mapping.
 --     * Slot: other Description: Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data.
@@ -80,7 +80,7 @@
 -- # Class: "Propagatable" Description: "Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class."
 --     * Slot: id Description: 
 --     * Slot: propagated Description: Indicates whether a slot can be propagated from a mapping down to individual mappings.
--- # Class: "NoTermFound" Description: "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjuction with a corresponding subject_source or object_source to signify where the term was not found."
+-- # Class: "NoTermFound" Description: "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found."
 --     * Slot: id Description: 
 -- # Class: "mapping set_mapping_set_source" Description: ""
 --     * Slot: mapping set_id Description: Autocreated FK slot

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,3 +25,10 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry.extras]
 docs = ["linkml", "mkdocs-material"]
+
+[tool.codespell]
+# Ref: https://github.com/codespell-project/codespell#using-a-config-file
+skip = '.git*,*.pdf,*.lock'
+check-hidden = true
+ignore-regex = '\b(COMENT|EHR|LOD)\b'
+# ignore-words-list = ''
diff --git a/src/CONFIG.yaml b/src/CONFIG.yaml
@@ -9,7 +9,7 @@ model_synopsis:  SSSOM     # One liner about the model
 model_description: |-      # Longer description of the model
   SSSOM
 
-# Trove Classifiers (https://pypi.org/classifiers/) -- these can be omitted if the elemens below are sufficient
+# Trove Classifiers (https://pypi.org/classifiers/) -- these can be omitted if the elements below are sufficient
 classifiers:
     - "Development Status :: 4 - Beta"
     - "Environment :: Console"

diff --git a/src/docs/5star-mappings.md b/src/docs/5star-mappings.md
@@ -25,7 +25,7 @@ to help you bring mapping production in your organization to the next level - st
     * **Optional**: record the subject and object labels to make it easier for humans to read the file
 * **Enables**:
     * Direct integration into ETL pipelines
-    * Dropping societal costs by enabling others to re-use mappings
+    * Dropping societal costs by enabling others to reuse mappings
     * Moving data between semantic spaces
 
 ## :star::star: 2-Star Mappings
@@ -38,7 +38,7 @@ uncertainty explicit, add license and select semantic mapping predicate.
     * Record a confidence value for the mapping between 0 and 1, where appropriate
     * Use a standard open license for the use of the mapping set (e.g. Creative Commons)
 * **Enables**:
-    * The worry-free re-use of mappings even if target or source terminologies are “closed”
+    * The worry-free reuse of mappings even if target or source terminologies are “closed”
     * Transparently versioned access to mappings and the opportunity to provide more direct feedback
     * Downstream users can filter for high-confidence mappings
 

diff --git a/src/docs/events/mc2021.md b/src/docs/events/mc2021.md
@@ -149,7 +149,7 @@ https://github.com/mapping-commons/sssom/issues/43
 #### Use cases for complex mappings
 
 - https://github.com/mapping-commons/sssom/issues/61
-- The main outcome for this discussion was that the particpant urged to keep the `Simple` in SSSOM, and that any decision to capture more complex mapping cases should be driven by a veruy strong use case
+- The main outcome for this discussion was that the participant urged to keep the `Simple` in SSSOM, and that any decision to capture more complex mapping cases should be driven by a veruy strong use case
 - For the first release of the SSSOM standard, we will not worry about complex mappings
 
 ## Next steps

diff --git a/src/docs/events/mc2023.md b/src/docs/events/mc2023.md
@@ -15,7 +15,7 @@ https://www.youtube.com/watch?v=juMQQ01Q540&list=PLqu_J7ADQtKyX55F7RqZtaSS7TwGd3
 
 - Scroll through [the documentation](https://mapping-commons.github.io/sssom/home/), in particular [the paper](https://doi.org/10.1093/database/baac035) and the [basic tutorial](https://mapping-commons.github.io/sssom/tutorial/) to get a basic sense of SSSOM
 - Skim through, or even watch, the [SSSOM CCB Seminar recording](https://www.youtube.com/watch?v=4vqeRECuAKE)
-- Scroll through the slide decks describing the problems we will be adressing (note, they are all works in progress):
+- Scroll through the slide decks describing the problems we will be addressing (note, they are all works in progress):
     - [Complex Mappings: Examples from OMOP2OBO (Tiffany Callahan)](https://docs.google.com/presentation/d/1Jn0W9gjRn19ISDB8N-sEwKwXsJySLPlNIsOL6ng_nEA/edit?usp=sharing)
     - [Complex mappings - the journey towards a proposal (Nico Matentzoglu)](https://docs.google.com/presentation/d/1kFD33S_WMgEGmCnT7IjVCeEyKI7OpcUw1ZzRXGqt1hs/edit?usp=sharing)
     - [Literal mappings with SSSOM (James McLaughlin)](https://docs.google.com/presentation/d/1mBZK6KS7JgmXlEtszQiOa_Cl7SXg_Z8wRp0tZHaL57Y/edit?usp=sharing)

diff --git a/src/docs/explanation/mappings.md b/src/docs/explanation/mappings.md
@@ -40,7 +40,7 @@ There are many important metadata elements to consider, but the ones that are by
 - _Confidence_: Do I trust the mapping? Was is done manually by an expert in my domain, or by an algorithm?
 - _Source version_: Which version of the term (or its corresponding ontology) was mapped? Is there a newer mapping which has a more suitable match for my term?
 
-Whenever you handle mappings (either create, or re-use), make sure you are keenly aware of at least these three metrics, and capture them. You may even want to consider using a proper mapping model like the [Simple Shared Standard for Ontology Mappings (SSSOM)](https://github.com/mapping-commons/SSSOM/blob/master/SSSOM.md) which will make your mappings FAIR and reusable.
+Whenever you handle mappings (either create, or reuse), make sure you are keenly aware of at least these three metrics, and capture them. You may even want to consider using a proper mapping model like the [Simple Shared Standard for Ontology Mappings (SSSOM)](https://github.com/mapping-commons/SSSOM/blob/master/SSSOM.md) which will make your mappings FAIR and reusable.
 
 ### String-string mappings
 String-string mappings are mappings that relate two strings. The task of matching two strings is ubiquitous for example in database search fields (where a user search string needs to be mapped to some strings in a database). Most, if not all effective ontology matching techniques will employ some form of string-string matching. For example, to match simple variations of labels such as "abnormal heart" and "heart abnormality", various techniques such as [Stemming](https://en.wikipedia.org/wiki/Stemming) and [bag of words](https://en.wikipedia.org/wiki/Bag-of-words_model#:~:text=The%20bag%2Dof%2Dwords%20model,word%20order%20but%20keeping%20multiplicity.) can be employed effectively. Other techniques such as edit-distance or Levenshtein can be used to quantify the similarity of two strings, which can provide useful insights into mapping candidates.

diff --git a/src/docs/faq.md b/src/docs/faq.md
@@ -6,7 +6,7 @@
 Mappings are frequently created on an ad-hoc basis, using simple two-column spreadsheets where the first column corresponds to the subject of the mapping, and the second column to the object of the mapping. This is insufficient for a variety of reasons:
 
 - non-transparent precision: While the assumption is that the subject "sort of mostly exactly" maps to the object, in practice this is rarely the case. Matches can `exact`, where the subject corresponds 100% to the object, `broad`, where the object is broader than the subject, and others. Qualifiers like `exact`, `broad`, `narrow`, `related` and `close` qualify the *precision* of the mapping (not to be mistaken for fuzziness of confidence). Without knowing the precision, we cannot accurately transform our data, nor can we use the mappings to "walk", i.e. move from one mapping to another, see [SSSOM 5-Star recommendation for mappings](spec.md#minimum)
-- non-transparent incompleteness: We don't know when the mapping was created, on the basis of what version of the terminological source of the subject or object. As time passes, we also lose confidence whether there would now be more suitable mappings, or wether there are new terms that are now fully covered by the mappings.
+- non-transparent incompleteness: We don't know when the mapping was created, on the basis of what version of the terminological source of the subject or object. As time passes, we also lose confidence whether there would now be more suitable mappings, or whether there are new terms that are now fully covered by the mappings.
 - non-transparent confidence: whether a tool or a human propose the mapping, there is always a bit of a risk the mapping call may be wrong. As consumers of the mappings we need to know how confident the mapping authors were (confidence score), and why they confident (curation rules, mapping justification).
 
 Currently, mappings are created by a variety of systems, manually curated and automatic, and we need a way to efficiently collect and combine them. Mapping sets and mappings with quality provenance metadata allow us to trace faulty mappings to the source and correct them in a way that _all_ users of the mapping set will profit from it.
@@ -43,7 +43,7 @@ facets. In that sense, it should be considered complementary, as it enables the
 something that goes beyond what most ontologies would offer. However, the concept of ontology mappings can be _perceived_ as antagonistic to Open Ontology
 principles, as its goal is _not the logical integration of knowledge, but the association or linking of terms across controlled semantic spaces_. 
 The OBO vision involves the building of a coherent, non-redundant semantic space of logically interconnected ontologies, which in particular
-wants to avoid the introduction of overlapping concepts. The mapping world specifically embraces heterogenous semantic spaces and overlapping concepts,
+wants to avoid the introduction of overlapping concepts. The mapping world specifically embraces heterogeneous semantic spaces and overlapping concepts,
 and seeks to bridge the semantic gaps using well-defined mapping relations such as "skos:broadMatch" or "owl:equivalentClass".
 
 
diff --git a/src/docs/funding.md b/src/docs/funding.md
@@ -22,9 +22,9 @@ which included, but was not limited to:
 - Implementation of validation and parsing methods in [sssom-py](https://github.com/mapping-commons/sssom-py)
 - Generating [training materials](../training.md)
 - Organising [workshops](../workshops.md)
-- Outreach activties to clinical communities such as [OHDSI](https://www.ohdsi.org/)
+- Outreach activities to clinical communities such as [OHDSI](https://www.ohdsi.org/)
 
-The grant was awared to members of the Monarch Initiative.
+The grant was awarded to members of the Monarch Initiative.
 
 ### Monarch (NIH / OD #5R24OD011883)
 
@@ -36,7 +36,7 @@ a few new features had to be supported:
 - Various improvements to the SSSOM metadata model, including the introduction of curation rules.
 - The [OxO2 SSSOM mapping browser](https://github.com/EBISPOT/oxo2)
 
-The grant was awared to members of the Monarch Initiative.
+The grant was awarded to members of the Monarch Initiative.
 
 ### Bosch Gift to LBNL
 
@@ -48,4 +48,4 @@ A lot of the work on tooling was supported by a Bosch Gift to the Lawrence Berke
 
 ### DARPA: Young Faculty Award W911NF2010255
 
-A huge amount of refactoring of [sssom-py](https://github.com/mapping-commons/sssom-py) and development best practices, as well as training materials, was provided through this grant (awared to Benjamin M. Gyori). Other contributions include work on the [Semantic mapping reasoner and assembler](https://github.com/biopragmatics/semra)
+A huge amount of refactoring of [sssom-py](https://github.com/mapping-commons/sssom-py) and development best practices, as well as training materials, was provided through this grant (awarded to Benjamin M. Gyori). Other contributions include work on the [Semantic mapping reasoner and assembler](https://github.com/biopragmatics/semra)