Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SHARE-777][Feature][HOLD] Add tind.io harvester for AgEcon #667

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Restore edu.ageconsearch transformer, for old data
aaxelb committed May 25, 2017
commit 3ca26e12f8207fe562600fa0af7a1439897eddfe
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
'com.peerj.xml = share.transformers.com_peerj_xml:PeerJXMLTransformer',
'com.researchregistry = share.transformers.com_researchregistry:RRTransformer',
'com.springer = share.transformers.com_springer:SpringerTransformer',
'edu.ageconsearch = share.transformers.edu_ageconsearch:AgeconTransformer',
'edu.gwu = share.transformers.edu_gwu:GWScholarSpaceTransformer',
'edu.harvarddataverse = share.transformers.edu_harvarddataverse:HarvardTransformer',
'gov.clinicaltrials = share.transformers.gov_clinicaltrials:ClinicalTrialsTransformer',
7 changes: 7 additions & 0 deletions share/sources/edu.ageconsearch/source.yaml
Original file line number Diff line number Diff line change
@@ -13,6 +13,13 @@ configs:
transformer: mods
transformer_kwargs:
emitted_type: Preprint
- base_url: http://ageconsearch.umn.edu/browse-date
disabled: true
earliest_date: null
harvester: null
label: edu.ageconsearch
transformer: edu.ageconsearch
transformer_kwargs: {}
home_page: http://ageconsearch.umn.edu/
long_title: AgEcon Search
name: edu.ageconsearch
138 changes: 138 additions & 0 deletions share/transformers/edu_ageconsearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import re

from share.transform.chain import *


class WorkIdentifier(Parser):
uri = IRI(ctx)


class AgentIdentifier(Parser):
uri = ctx


class Agent(Parser):
schema = GuessAgentType(ctx.name)
name = ctx.name
identifiers = Map(Delegate(AgentIdentifier), Try(IRI(ctx.email)))


class ContributorRelation(Parser):
schema = 'Contributor'

agent = Delegate(Agent, ctx)
cited_as = ctx.name


class CreatorRelation(ContributorRelation):
schema = 'Creator'

order_cited = ctx('index')


class AffiliatedAgent(Parser):
schema = GuessAgentType(ctx, default='organization')
name = ctx


class AgentWorkRelation(Parser):
agent = Delegate(AffiliatedAgent, ctx)


class Tag(Parser):
name = ctx


class ThroughTags(Parser):
tag = Delegate(Tag, ctx)


class Subject(Parser):
name = ctx


class ThroughSubjects(Parser):
subject = Delegate(Subject, ctx)


class Preprint(Parser):
title = Try(ctx.title)
description = Try(ctx.abstract)
identifiers = Concat(
Map(Delegate(WorkIdentifier), ctx.primary_identifier),
Map(Delegate(WorkIdentifier), ctx.uri),
Map(Delegate(WorkIdentifier), Try(ctx.identifiers)),
)

related_agents = Concat(
Map(
Delegate(CreatorRelation),
RunPython('get_agent_emails', ctx, 'authors', 'authors_email')
),
Map(
Delegate(ContributorRelation),
RunPython('get_agent_emails', ctx, 'editors', 'editors_email')
),
Map(
Delegate(AgentWorkRelation),
RunPython('get_affiliated_organization', Try(ctx.institution_association))
)
)

tags = Map(Delegate(ThroughTags), Try(ctx.keywords))
date_published = ParseDate(Try(ctx.issue_date))
subjects = Map(Delegate(ThroughSubjects), Subjects(Try(ctx.jel_codes)))

class Extra:
other_titles = Try(ctx.other_titles)
notes = Try(ctx.notes)
editors = Try(ctx.editors)
editors_email = Try(ctx.editors_email)
authors = Try(ctx.authors)
authors_email = Try(ctx.authors_email)
series_report_number = Try(ctx.series_report_number)
institution_association = Try(ctx.institution_association)
collections = Try(ctx.collections)
total_pages = Try(ctx.total_pages)
from_page = Try(ctx.from_page)
to_page = Try(ctx.to_page)
identifiers = Try(ctx.identifiers)
uri = ctx.uri

def get_agent_emails(self, ctx, agent_key, email_key):
"""
emails format: [name (email), name (email)]
"""
try:
agents = ctx[agent_key] if isinstance(ctx[agent_key], list) else [ctx[agent_key]]
except KeyError:
agents = []

try:
emails = ctx[email_key] if isinstance(ctx[email_key], list) else [ctx[email_key]]
except KeyError:
emails = []

agent_objects = []

for agent in agents:
agent_object = {'name': agent}

agent_email = next((x for x in emails if agent in x), None)

if agent_email:
agent_object['email'] = re.compile('\((\S+?)\)').search(agent_email).group(1)
agent_objects.append(agent_object)

return agent_objects

def get_affiliated_organization(self, affiliation):
"""
affiliation format: 'name>volume issue etc'
"""
return affiliation.split('>')[0]


class AgeconTransformer(ChainTransformer):
VERSION = 1
root_parser = Preprint
3 changes: 1 addition & 2 deletions share/transformers/mods.py
Original file line number Diff line number Diff line change
@@ -240,8 +240,7 @@ class MODSCreativeWork(Parser):
lambda obj: 'invalid' not in obj,
tools.Concat(
tools.Try(ctx['mods:identifier']),
tools.Try(ctx.header['identifier']),
tools.Try(ctx['mods:location']['mods:url']),
tools.Try(ctx.header['identifier'])
)
)
)