Skip to content

Commit

Permalink
Merge branch 'main' of github.com:caltechlibrary/irdmtools into gh-pages
Browse files Browse the repository at this point in the history
  • Loading branch information
rsdoiel committed Feb 22, 2024
2 parents ddb2422 + 83a4cc9 commit 6caedf9
Show file tree
Hide file tree
Showing 38 changed files with 1,468 additions and 272 deletions.
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ authors:


repository-code: "https://github.com/caltechlibrary/irdmtools"
version: 0.0.68
version: 0.0.70
license-url: "https://caltechlibrary.github.io/irdmtools/LICENSE"
keywords: [ "institutional repository", "data
management", "Invenio", "Invenio-RDM" ]
6 changes: 6 additions & 0 deletions TODO.html
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ <h2 id="bugs">Bugs</h2>
</ul>
<h2 id="next">Next</h2>
<ul class="task-list">
<li><label><input type="checkbox" />Integrate a YAML options file into
doi2rdm so that we can easily map our customized mapings via
configuration instead of hard coding them.</label></li>
<li><label><input type="checkbox" />irdmtools Go based commands should
not use the RDM JSON API, they should all be used to directly access the
Postgres database</label></li>
<li><label><input type="checkbox" checked="" />rdmutil get_all_ids needs
a get_all_stale_ids counterpart, see issue #68 (implemented
get_record_versions”`</label></li>
Expand Down
2 changes: 2 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ Bugs
Next
----

- [ ] Integrate a YAML options file into doi2rdm so that we can easily map our customized mapings via configuration instead of hard coding them.
- [ ] irdmtools Go based commands should not use the RDM JSON API, they should all be used to directly access the Postgres database
- [x] rdmutil get_all_ids needs a get_all_stale_ids counterpart, see issue #68 (implemented get_record_versions"`
- [x] add put_record to rdmutil, actually done as many steps, new_record, new_draft, update_draft, ...
- [x] Implement a CrossRef to Invenio RDM record
Expand Down
2 changes: 1 addition & 1 deletion about.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

<section>
<h1 id="about-this-software">About this software</h1>
<h2 id="irdmtools-0.0.68">irdmtools 0.0.68</h2>
<h2 id="irdmtools-0.0.70">irdmtools 0.0.70</h2>
<h3 id="authors">Authors</h3>
<ul>
<li>R. S. Doiel</li>
Expand Down
4 changes: 2 additions & 2 deletions about.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ authors:
orcid: "https://orcid.org/0000-0001-9266-5146"

repository-code: "https://github.com/caltechlibrary/irdmtools"
version: 0.0.68
version: 0.0.70
license-url: "https://caltechlibrary.github.io/irdmtools/LICENSE"
keywords: [ "institutional repository", "data
management", "Invenio", "Invenio-RDM" ]
Expand All @@ -24,7 +24,7 @@ management", "Invenio", "Invenio-RDM" ]
About this software
===================

## irdmtools 0.0.68
## irdmtools 0.0.70

### Authors

Expand Down
85 changes: 36 additions & 49 deletions cmd/doi2rdm/doi2rdm.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ var (
# SYNOPSIS
{app_name} [OPTIONS] DOI
{app_name} [OPTIONS] [OPTIONS_YAML] DOI
# DESCRIPTION
Expand All @@ -64,6 +64,15 @@ suitable for import into Invenio RDM. The DOI can be in either their
canonical form or URL form (e.g. "10.1021/acsami.7b15651" or
"https://doi.org/10.1021/acsami.7b15651").
# OPTIONS_YAML
{app_name} can use an YAML options file to set the behavior of the
crosswalk from CrossRef to RDM. This replaces many of the options
previously required in prior implementations of this tool. See all the
default options setting use the `+"`"+`-show-yaml`+"`"+` command line
options. You can save this to disk, modify it, then use them for
migrating content from CrossRef to RDM.
# OPTIONS
-help
Expand All @@ -78,38 +87,31 @@ canonical form or URL form (e.g. "10.1021/acsami.7b15651" or
-diff JSON_FILENAME
: compare the JSON_FILENAME contents with record generated from CrossRef works record
-dot-initials
: Add period to initials in given name
-download
: attempt to download the digital object if object URL provided
-mailto
: (string) set the mailto value for CrossRef API access (default "[email protected]")
-resource-map
: Use this two column CSV file (no header row) to map resource types in CrossRef to RDM
-contributor-map
: Use this two column CSV file (no header row) to map contributor types from CrossRef (e.g.
"author", "translator", "editor", "chair") to RDM roles.
-show-yaml
: This will display the default YAML configuration file. You can save this and customize to suit your needs.
# EXAMPLES
Example generating a JSON document for a single DOI. The resulting
text file is called "article.json".
text file is called "article.json". In this example "options.yaml"
is the configuration file for setup for your RDM instance.
~~~
{app_name} "10.1021/acsami.7b15651" >article.json
{app_name} options.yaml "10.1021/acsami.7b15651" >article.json
~~~
Check to see the difference from the saved "article.json" and
the current metadata retrieved from CrossRef.
~~~
{app_name} -diff article.json "10.1021/acsami.7b15651
{app_name} -diff article.json doi2rdm.yaml "10.1021/acsami.7b15651
~~~
Save the default YAML options to a file.
~~~
{app_name} -show-yaml >options.yaml
~~~
`
)

Expand All @@ -123,19 +125,13 @@ func main() {
fmtHelp := irdmtools.FmtHelp

showHelp, showVersion, showLicense := false, false, false
debug, downloadDocument := false, false
dotInitials := false
mailTo, diffFName := "", ""
resourceTypeFName, contributorTypeFName := "", ""
debug, showYAML := false, false
diffFName := ""
flag.BoolVar(&showHelp, "help", false, "display help")
flag.BoolVar(&showVersion, "version", false, "display version")
flag.BoolVar(&showLicense, "license", false, "display license")
flag.BoolVar(&showYAML, "show-yaml", false, "display the YAML configuration")
flag.StringVar(&diffFName, "diff", diffFName, "compare the JSON file with the current record generated from CrossRef")
flag.BoolVar(&dotInitials, "dot-initials", dotInitials, "Add period to initials in given name")
flag.BoolVar(&downloadDocument, "download", downloadDocument, "attempt to download the digital object if object URL provided")
flag.StringVar(&mailTo, "mailto", mailTo, "set the mail to value for CrossRef API access")
flag.StringVar(&resourceTypeFName, "resource-map", resourceTypeFName, "Use this CSV to map resource types from CrossREF to RDM")
flag.StringVar(&contributorTypeFName, "contributor-map", contributorTypeFName, "Use this CSV to map contributor types from CrossREF to RDM")
flag.BoolVar(&debug, "debug", debug, "display additional info to stderr")
flag.Parse()
args := flag.Args()
Expand All @@ -153,39 +149,30 @@ func main() {
fmt.Fprintf(os.Stdout, "%s\n", irdmtools.LicenseText)
os.Exit(0)
}
if showYAML {
fmt.Fprintf(os.Stdout, "%s\n", irdmtools.DefaultDoi2RdmOptionsYAML)
os.Exit(0)
}
// Create a appity object
app := new(irdmtools.Doi2Rdm)
app.Cfg = new(irdmtools.Config)
options := map[string]string{}
if resourceTypeFName != "" {
options["resource-map"] = resourceTypeFName
}
if contributorTypeFName != "" {
options["contributor-map"] = contributorTypeFName
}
if diffFName != "" {
options["diff"] = diffFName
}
if dotInitials {
options["dot_initials"] = "true"
}
if downloadDocument {
options["download_document"] = "true"
}
if mailTo != "" {
options["mailto"] = mailTo
}
if debug {
app.Cfg.Debug = true
} else {
app.Cfg.Debug = false
}

if len(args) != 1 {
if len(args) < 1 {
fmt.Fprintln(os.Stderr, "expected a single DOI on the command line")
os.Exit(1)
}
if err := app.Run(os.Stdin, os.Stdout, os.Stderr, options, args[0]); err != nil {
optionsFName, doi := "", ""
if len(args) > 1 {
optionsFName, doi = args[0], args[1]
} else {
doi = args[0]
}
if err := app.Run(os.Stdin, os.Stdout, os.Stderr, optionsFName, doi, diffFName); err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
Expand Down
4 changes: 2 additions & 2 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
"license": "https://caltechlibrary.github.io/irdmtools/LICENSE",
"codeRepository": "https://github.com/caltechlibrary/irdmtools",
"dateCreated": "2022-10-27",
"dateRelease": "2024-01-16",
"dateRelease": "2024-01-24",
"issueTracker": "https://github.com/caltechlibrary/irdmtools/issues",
"name": "irdmtools",
"version": "0.0.68",
"version": "0.0.70",
"description": "Tools for working with institutional repositories and data management systems. Current implementation targets Invenio-RDM.",
"applicationCategory": "library science",
"releaseNotes": "Proof of concept and refinements, piloting in production",
Expand Down
47 changes: 26 additions & 21 deletions crossref.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ import (
"github.com/caltechlibrary/simplified"
)

func QueryCrossRefWork(cfg *Config, doi string, mailTo string, dotInitials bool, downloadDocument bool) (*crossrefapi.Works, error) {
func QueryCrossRefWork(cfg *Config, doi string, options *Doi2RdmOptions) (*crossrefapi.Works, error) {
appName := path.Base(os.Args[0])
client, err := crossrefapi.NewCrossRefClient(appName, mailTo)
client, err := crossrefapi.NewCrossRefClient(appName, options.MailTo)
if err != nil {
return nil, err
}
Expand All @@ -29,23 +29,6 @@ func QueryCrossRefWork(cfg *Config, doi string, mailTo string, dotInitials bool,
return works, nil
}

// normalizeCrossRefType converts content type from CrossRef
// to Authors (e.g. "journal-article" to "publication-article")
func normalizeCrossRefType(s string) string {
//FIXME: Ideally this should take a resource type map.
switch strings.ToLower(s) {
// case "proceedings-article":
// //FIXME: this mapping may not be correct, was book_section in EPrints CaltechAUTHORS
// return "publication-section"
case "journal-article":
return "publication-article"
case "book-chapter":
return "publication-section"
default:
return s
}
}

// getResourceType retrives the resource type from works.message.type
// runs normalize
func getResourceType(work *crossrefapi.Works) string {
Expand Down Expand Up @@ -468,17 +451,37 @@ func getApproved(work *crossrefapi.Works) *simplified.DateType {
return nil
}

// normalizePublisherName will check the publisher DOI and ISSN to see if we have
// a preferred name in our options. If so it will return that.
func normalizePublisherName(val string, work *crossrefapi.Works, options *Doi2RdmOptions) string {
doi := getDOI(work)
if doi != "" {
doiPrefix, _ := DoiPrefix(val)
if value, ok := options.DoiPrefixPublishers[doiPrefix]; ok {
return value
}
}
for _, issn := range getISSNs(work) {
if issn != "" {
if value, ok := options.ISSNPublishers[issn]; ok {
return value
}
}
}
return val
}

// CrosswalkCrossRefWork takes a Works object from the CrossRef API
// and maps the fields into an simplified Record struct return a
// new struct or error.
func CrosswalkCrossRefWork(cfg *Config, work *crossrefapi.Works, resourceTypeMap map[string]string, contributorTypeMap map[string]string) (*simplified.Record, error) {
func CrosswalkCrossRefWork(cfg *Config, work *crossrefapi.Works, options *Doi2RdmOptions) (*simplified.Record, error) {
if work == nil {
return nil, fmt.Errorf("crossref api works not populated")
}
rec := new(simplified.Record)
// .message.type -> .record.metadata.resource_type (via controlled vocabulary)
if value := getResourceType(work); value != "" {
if err := SetResourceType(rec, value, resourceTypeMap); err != nil {
if err := SetResourceType(rec, value, options.ResourceTypes); err != nil {
return nil, err
}
}
Expand Down Expand Up @@ -517,6 +520,8 @@ func CrosswalkCrossRefWork(cfg *Config, work *crossrefapi.Works, resourceTypeMap
}
}
if value := getPublisher(work); value != "" {
// FIXME: Setting the publisher name is going to be normalized via DOI prefix, maybe ISSN?
value := normalizePublisherName(value, work, options)
if err := SetPublisher(rec, value); err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit 6caedf9

Please sign in to comment.