Skip to content

Commit

Permalink
Upgraded to version 2.0 of ORCID API
Browse files Browse the repository at this point in the history
  • Loading branch information
Justin Littman committed Sep 13, 2017
1 parent 169da58 commit 14c4c8e
Show file tree
Hide file tree
Showing 17 changed files with 4,007 additions and 8,426 deletions.
31 changes: 15 additions & 16 deletions orcid2vivo.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def should_create(self, clazz, uri):

def to_uri(self, clazz, attrs, general_clazz=None):
uri = self._identifier_strategy.to_uri(clazz, attrs, general_clazz=None)
#Need to remember vcard uri for this person so that can skip.
# Need to remember vcard uri for this person so that can skip.
if clazz == VCARD.Name and attrs.get("person_uri") == self.person_uri:
self.person_name_vcard_uri = uri
return uri
Expand All @@ -62,19 +62,19 @@ def __init__(self, identifier_strategy, create_strategy):

def crosswalk(self, orcid_id, person_uri, person_class=None, confirmed_orcid_id=False):

#Create an RDFLib Graph
# Create an RDFLib Graph
graph = Graph(namespace_manager=ns.ns_manager)

#0000-0003-3441-946X
# 0000-0003-3441-946X
clean_orcid_id = clean_orcid(orcid_id)
orcid_profile = fetch_orcid_profile(clean_orcid_id)

#Determine the class to use for the person
# Determine the class to use for the person
person_clazz = FOAF.Person
if person_class:
person_clazz = getattr(VIVO, person_class)

#ORCID
# ORCID
PersonCrosswalk._add_orcid_id(person_uri, clean_orcid_id, graph, confirmed_orcid_id)

self.bio_crosswalker.crosswalk(orcid_profile, person_uri, graph, person_class=person_clazz)
Expand All @@ -95,32 +95,31 @@ def _add_orcid_id(person_uri, orcid_id, graph, confirmed):

def fetch_orcid_profile(orcid_id):
orcid = clean_orcid(orcid_id)
#curl -H "Accept: application/orcid+json" 'http://pub.orcid.org/v1.2/0000-0003-3441-946X/orcid-profile' -L -i
r = requests.get('http://pub.orcid.org/v1.2/%s/orcid-profile' % orcid,
headers={"Accept": "application/orcid+json"})
r = requests.get('https://pub.orcid.org/v2.0/%s' % orcid,
headers={"Accept": "application/json"})
if r:
return r.json()
else:
raise Exception("Request to fetch ORCID profile for %s returned %s" % (orcid, r.status_code))


def set_namespace(namespace=None):
#Set default VIVO namespace
# Set default VIVO namespace
if namespace:
ns.D = Namespace(namespace)
ns.ns_manager.bind('d', ns.D, replace=True)


def default_execute(orcid_id, namespace=None, person_uri=None, person_id=None, skip_person=False, person_class=None,
confirmed_orcid_id=False):
#Set namespace
# Set namespace
set_namespace(namespace)

this_identifier_strategy = HashIdentifierStrategy()
this_person_uri = URIRef(person_uri) if person_uri \
else this_identifier_strategy.to_uri(FOAF.Person, {"id": person_id or orcid_id})

#this_create_strategy will implement both create strategy and identifier strategy
# this_create_strategy will implement both create strategy and identifier strategy
this_create_strategy = SimpleCreateEntitiesStrategy(this_identifier_strategy, skip_person=skip_person,
person_uri=this_person_uri)

Expand Down Expand Up @@ -155,25 +154,25 @@ def default_execute(orcid_id, namespace=None, person_uri=None, person_id=None, s
help="Skip adding triples declaring the person and the person's name.")
parser.add_argument("--confirmed", action="store_true", help="Mark the orcid id as confirmed.")

#Parse
# Parse
args = parser.parse_args()

#Excute with default strategies
# Excute with default strategies
(g, p, per_uri) = default_execute(args.orcid_id, namespace=args.namespace, person_uri=args.person_uri,
person_id=args.person_id, skip_person=args.skip_person,
person_class=args.person_class, confirmed_orcid_id=args.confirmed)

#Write to file
# Write to file
if args.file:
with codecs.open(args.file, "w") as out:
g.serialize(format=args.format, destination=out)

#Post to SPARQL Update
# Post to SPARQL Update
if args.endpoint:
if not args.username or not args.password:
raise Exception("If an endpoint is specified, --username and --password must be provided.")
sparql_insert(g, args.endpoint, args.username, args.password)

#If not writing to file to posting to SPARQL Update then serialize to stdout
# If not writing to file to posting to SPARQL Update then serialize to stdout
if not args.file and not args.endpoint:
print g.serialize(format=args.format)
54 changes: 26 additions & 28 deletions orcid2vivo_app/affiliations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,78 +6,76 @@
import orcid2vivo_app.vivo_namespace as ns


class AffiliationsCrosswalk():
class AffiliationsCrosswalk:
def __init__(self, identifier_strategy, create_strategy):
self.identifier_strategy = identifier_strategy
self.create_strategy = create_strategy

def crosswalk(self, orcid_profile, person_uri, graph):
#Education
for affiliation in ((orcid_profile["orcid-profile"].get("orcid-activities") or {})
.get("affiliations", {}) or {}).get("affiliation", []):
if affiliation["type"] == "EDUCATION":
#Gather some values
degree_name = affiliation.get("role-title")
organization_name=affiliation["organization"]["name"]
start_date_year = (affiliation["start-date"] or {}).get("year", {}).get("value")
end_date_year = (affiliation["end-date"] or {}).get("year", {}).get("value")
# Education
if "educations" in orcid_profile["activities-summary"]:
for education in orcid_profile["activities-summary"]["educations"]["education-summary"]:
# Gather some values
degree_name = education.get("role-title")
organization_name = education["organization"]["name"]
start_date_year = (education["start-date"] or {}).get("year", {}).get("value")
end_date_year = (education["end-date"] or {}).get("year", {}).get("value")

#Organization
# Organization
organization_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": organization_name})
if self.create_strategy.should_create(FOAF.Organization, organization_uri):
graph.add((organization_uri, RDF.type, FOAF.Organization))
graph.add((organization_uri, RDFS.label, Literal(organization_name)))
if "address" in affiliation["organization"]:
city = affiliation["organization"]["address"]["city"]
state = affiliation["organization"]["address"]["region"]
if "address" in education["organization"]:
city = education["organization"]["address"]["city"]
state = education["organization"]["address"]["region"]
address_uri = ns.D[to_hash_identifier("geo", (city, state))]
graph.add((address_uri, RDF.type, VIVO.GeographicLocation))
graph.add((organization_uri, OBO.RO_0001025, address_uri))
graph.add((address_uri, RDFS.label, Literal("%s, %s" % (city, state))))

#Output of educational process
# Output of educational process
educational_process_uri = self.identifier_strategy.to_uri(VIVO.EducationalProcess,
{"organization_name": organization_name,
"degree_name": degree_name,
"start_year": start_date_year,
"end_year": end_date_year})
graph.add((educational_process_uri, RDF.type, VIVO.EducationalProcess))
#Has participants
# Has participants
graph.add((educational_process_uri, OBO.RO_0000057, organization_uri))
graph.add((educational_process_uri, OBO.RO_0000057, person_uri))
#Department
if affiliation.get("department-name"):
# Department
if education.get("department-name"):
graph.add((educational_process_uri, VIVO.departmentOrSchool,
Literal(affiliation["department-name"])))
Literal(education["department-name"])))

#Interval
# Interval
add_date_interval(educational_process_uri, graph, self.identifier_strategy,
add_date(start_date_year, graph, self.identifier_strategy),
add_date(end_date_year, graph, self.identifier_strategy))

if "role-title" in affiliation:
degree_name = affiliation["role-title"]
if "role-title" in education:
degree_name = education["role-title"]

#Awarded degree
# Awarded degree
awarded_degree_uri = self.identifier_strategy.to_uri(VIVO.AwardedDegree,
{"educational_process_uri":
educational_process_uri})
graph.add((awarded_degree_uri, RDF.type, VIVO.AwardedDegree))
graph.add((awarded_degree_uri, RDFS.label, Literal(degree_name)))

#Assigned by organization
# Assigned by organization
graph.add((awarded_degree_uri, VIVO.assignedBy, organization_uri))

#Related to educational process
# Related to educational process
graph.add((awarded_degree_uri, OBO.RO_0002353, educational_process_uri))

#Relates to degree
# Relates to degree
degree_uri = self.identifier_strategy.to_uri(VIVO.AcademicDegree, {"name": degree_name})
graph.add((awarded_degree_uri, VIVO.relates, degree_uri))
if self.create_strategy.should_create(VIVO.AcademicDegree, degree_uri):
graph.add((degree_uri, RDF.type, VIVO.AcademicDegree))
graph.add((degree_uri, RDFS.label, Literal(degree_name)))

#Relates to person
# Relates to person
graph.add((awarded_degree_uri, VIVO.relates, person_uri))

107 changes: 57 additions & 50 deletions orcid2vivo_app/bio.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,59 +4,66 @@
from vivo_namespace import VCARD, OBO, FOAF


class BioCrosswalk():
class BioCrosswalk:
def __init__(self, identifier_strategy, create_strategy):
self.identifier_strategy = identifier_strategy
self.create_strategy = create_strategy

def crosswalk(self, orcid_profile, person_uri, graph, person_class=FOAF.Person):

#Get names (for person and name vcard)
person_details = orcid_profile["orcid-profile"]["orcid-bio"].get("personal-details", {})
given_names = person_details.get("given-names", {}).get("value")
family_name = person_details.get("family-name", {}).get("value")
full_name = join_if_not_empty((given_names, family_name))
# Get names (for person and name vcard)
given_names = None
family_name = None
if "name" in orcid_profile["person"]:
person_details = orcid_profile["person"]["name"]
given_names = person_details.get("given-names", {}).get("value")
family_name = person_details.get("family-name", {}).get("value")
full_name = join_if_not_empty((given_names, family_name))

#Following is non-vcard bio information
# Following is non-vcard bio information

#If skip_person, then don't create person and add names
if full_name and self.create_strategy.should_create(person_class, person_uri):
#Add person
graph.add((person_uri, RDF.type, person_class))
graph.add((person_uri, RDFS.label, Literal(full_name)))
# If skip_person, then don't create person and add names
if full_name and self.create_strategy.should_create(person_class, person_uri):
# Add person
graph.add((person_uri, RDF.type, person_class))
graph.add((person_uri, RDFS.label, Literal(full_name)))

#Biography
biography = (orcid_profile["orcid-profile"]["orcid-bio"].get("biography") or {}).get("value")
if biography:
graph.add((person_uri, VIVO.overview, Literal(biography)))
# Biography
if "biography" in orcid_profile["person"]:
biography = orcid_profile["person"]["biography"]["content"]
if biography:
graph.add((person_uri, VIVO.overview, Literal(biography)))

#Other identifiers
#Default VIVO-ISF only supports a limited number of identifier types.
external_identifiers = \
(orcid_profile["orcid-profile"]["orcid-bio"].get("external-identifiers", {}) or {}).get("external-identifier", [])
for external_identifier in external_identifiers:
#Scopus ID
if external_identifier["external-id-common-name"]["value"] == "Scopus Author ID":
graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-reference"]["value"])))
# Other identifiers
# Default VIVO-ISF only supports a limited number of identifier types.
if "external-identifiers" in orcid_profile["person"]:
external_identifiers = orcid_profile["person"]["external-identifiers"]["external-identifier"]
for external_identifier in external_identifiers:
# Scopus ID
if external_identifier["external-id-type"] == "Scopus Author ID":
graph.add((person_uri, VIVO.scopusId, Literal(external_identifier["external-id-value"])))

#ISI Research ID
if external_identifier["external-id-common-name"]["value"] == "ResearcherID":
graph.add((person_uri, VIVO.researcherId, Literal(external_identifier["external-id-reference"]["value"])))
# ISI Research ID
if external_identifier["external-id-type"] == "ResearcherID":
graph.add((person_uri, VIVO.researcherId, Literal(external_identifier["external-id-value"])))

#Keywords
keywords = \
(orcid_profile["orcid-profile"]["orcid-bio"].get("keywords", {}) or {}).get("keyword", [])
for keyword in keywords:
graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword["value"])))
# Keywords
if "keywords" in orcid_profile["person"]:
keywords = orcid_profile["person"]["keywords"]["keyword"]
for keyword in keywords:
keywords_content = keyword["content"]
if keywords_content:
for keyword_content in keywords_content.split(", "):
graph.add((person_uri, VIVO.freetextKeyword, Literal(keyword_content)))

#Following is vcard bio information
# Following is vcard bio information

#Add main vcard
# Add main vcard
vcard_uri = self.identifier_strategy.to_uri(VCARD.Individual, {"person_uri": person_uri})
#Will only add vcard if there is a child vcard
# Will only add vcard if there is a child vcard
add_main_vcard = False

#Name vcard
# Name vcard
vcard_name_uri = self.identifier_strategy.to_uri(VCARD.Name, {"person_uri": person_uri})
if (given_names or family_name) and self.create_strategy.should_create(VCARD.Name, vcard_name_uri):
graph.add((vcard_name_uri, RDF.type, VCARD.Name))
Expand All @@ -67,21 +74,21 @@ def crosswalk(self, orcid_profile, person_uri, graph, person_class=FOAF.Person):
graph.add((vcard_name_uri, VCARD.familyName, Literal(family_name)))
add_main_vcard = True

#Websites
researcher_urls = \
(orcid_profile["orcid-profile"]["orcid-bio"].get("researcher-urls", {}) or {}).get("researcher-url", [])
for researcher_url in researcher_urls:
url = researcher_url["url"]["value"]
url_name = (researcher_url["url-name"] or {}).get("value")
vcard_website_uri = self.identifier_strategy.to_uri(VCARD.URL, {"url": url})
graph.add((vcard_website_uri, RDF.type, VCARD.URL))
graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri))
graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI)))
if url_name:
graph.add((vcard_website_uri, RDFS.label, Literal(url_name)))
add_main_vcard = True
# Websites
if "researcher-urls" in orcid_profile["person"]:
researcher_urls = orcid_profile["person"]["researcher-urls"]["researcher-url"]
for researcher_url in researcher_urls:
url = researcher_url["url"]["value"]
url_name = researcher_url["url-name"]
vcard_website_uri = self.identifier_strategy.to_uri(VCARD.URL, {"url": url})
graph.add((vcard_website_uri, RDF.type, VCARD.URL))
graph.add((vcard_uri, VCARD.hasURL, vcard_website_uri))
graph.add((vcard_website_uri, VCARD.url, Literal(url, datatype=XSD.anyURI)))
if url_name:
graph.add((vcard_website_uri, RDFS.label, Literal(url_name)))
add_main_vcard = True

if add_main_vcard and self.create_strategy.should_create(VCARD.Individual, vcard_uri):
graph.add((vcard_uri, RDF.type, VCARD.Individual))
#Contact info for
# Contact info for
graph.add((vcard_uri, OBO.ARG_2000029, person_uri))
Loading

0 comments on commit 14c4c8e

Please sign in to comment.