Skip to content

Commit

Permalink
Make installable
Browse files Browse the repository at this point in the history
  • Loading branch information
tmorrell committed Oct 5, 2018
1 parent b598132 commit 554a388
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 10 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
/*.xml
pw
build/
dist/
epxml_to_datacite.egg-info/
23 changes: 13 additions & 10 deletions caltech_thesis.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
import xmltodict
from datacite import DataCiteMDSClient,schema40
import glob,json,datetime,re,argparse,subprocess
import glob,json,datetime,re
import os,argparse,subprocess

def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', raw_html)
return cleantext

def epxml_to_datacite(eprint,thesis_subjects):
def epxml_to_datacite(eprint):

#Parse subjects file to create dictionary of Eprints keys and labels
ref_file = os.path.join(os.path.dirname(__file__),'thesis-subjects.txt')
infile = open(ref_file,'r')
thesis_subjects = {}
for line in infile:
split = line.split(':')
thesis_subjects[split[0]]=split[1]

metadata = {}

#Transforming Metadata
Expand Down Expand Up @@ -142,13 +152,6 @@ def epxml_to_datacite(eprint,thesis_subjects):
parser.add_argument('-test', action='store_true', help='Only register test DOI')
args = parser.parse_args()

#Parse subjects file to create dictionary of Eprints keys and labels
infile = open('thesis-subjects.txt','r')
thesis_subjects = {}
for line in infile:
split = line.split(':')
thesis_subjects[split[0]]=split[1]

files = glob.glob('*.xml')
for f in files:
if 'datacite' not in f:
Expand All @@ -159,7 +162,7 @@ def epxml_to_datacite(eprint,thesis_subjects):
eprint = xmltodict.parse(fd.read())['eprints']['eprint']
print(eprint['title'])

metadata = epxml_to_datacite(eprint,thesis_subjects)
metadata = epxml_to_datacite(eprint)

#Validation fails on Windows
#assert schema40.validate(metadata)
Expand Down
11 changes: 11 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from setuptools import setup
setup(
name = 'epxml_to_datacite',
version ='0.9',
py_modules = ["caltech_thesis"],
data_files=[('.',['thesis-subjects.txt'])],
install_requires=[
'xmltodict',
'datacite'
]
)

0 comments on commit 554a388

Please sign in to comment.