diff --git a/.gitignore b/.gitignore
index 8c7ffd62..4107f6a2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -116,3 +116,4 @@ t.json
t.keys
t.txt
man/*
+sandbox/*
diff --git a/CITATION.cff b/CITATION.cff
index 096f235b..12bd488b 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -14,7 +14,7 @@ authors:
repository-code: "https://github.com/caltechlibrary/irdmtools"
-version: 0.0.76
+version: 0.0.78
license-url: "https://caltechlibrary.github.io/irdmtools/LICENSE"
keywords: [ "institutional repository", "data
management", "Invenio", "Invenio-RDM" ]
diff --git a/Makefile b/Makefile
index ce6b8251..bec6dad9 100644
--- a/Makefile
+++ b/Makefile
@@ -78,9 +78,9 @@ installer.sh: .FORCE
test: $(PACKAGE)
#go test -timeout 120h
go test -test.v -run Test01Config
- go test -test.v -run Test01Query
- go test -timeout 2h -ids testdata/test_record_ids.json -run Test02GetRecord
- go test -timeout 2h -ids testdata/test_record_ids.json -run Test03Harvest
+ #go test -test.v -run Test01Query
+ #go test -timeout 2h -ids testdata/test_record_ids.json -run Test02GetRecord
+ #go test -timeout 2h -ids testdata/test_record_ids.json -run Test03Harvest
go test -timeout 2h -run Test01GetRecordIds
go test -timeout 2h -run Test01GetModifiedIds
@@ -117,6 +117,7 @@ install: build
@echo ""
@echo "Make sure $(PREFIX)/bin is in your PATH"
@echo "Installing man page in $(PREFIX)/man"
+ @mkdir -p $(PREFIX)/man/man1
@for FNAME in $(MAN_PAGES); do if [ -f "./man/man1/$${FNAME}" ]; then cp -v "./man/man1/$${FNAME}" "$(PREFIX)/man/man1/$${FNAME}"; fi; done
@echo ""
@echo "Make sure $(PREFIX)/man is in your MANPATH"
diff --git a/README.md b/README.md
index 2c988380..82e1434c 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ This tools take an EPrint record in a dataset collection and returns an abbrevia
- An Invenio RDM deployment
- To building the Go based software and documentation
- git
- - Go >= 1.20.7
+ - Go >= 1.22.1
- Make (e.g. GNU Make)
- Pandoc >= 3
- For harvesting content
diff --git a/about.html b/about.html
index 1a1c1fcf..e662565b 100644
--- a/about.html
+++ b/about.html
@@ -24,7 +24,7 @@
About this software
-
+
Authors
R. S. Doiel
@@ -60,7 +60,7 @@ Operating Systems
Software Requiremets
-Go >= 1.22
+Go >= 1.22.1
PostgreSQL >= 16
PostgREST >= 12
Pandoc >= 3
diff --git a/about.md b/about.md
index 16b28334..1f841802 100644
--- a/about.md
+++ b/about.md
@@ -14,7 +14,7 @@ authors:
orcid: "https://orcid.org/0000-0001-9266-5146"
repository-code: "https://github.com/caltechlibrary/irdmtools"
-version: 0.0.76
+version: 0.0.78
license-url: "https://caltechlibrary.github.io/irdmtools/LICENSE"
keywords: [ "institutional repository", "data
management", "Invenio", "Invenio-RDM" ]
@@ -24,7 +24,7 @@ management", "Invenio", "Invenio-RDM" ]
About this software
===================
-## irdmtools 0.0.76
+## irdmtools 0.0.78
### Authors
@@ -59,7 +59,7 @@ systems. Current implementation targets Invenio-RDM.
### Software Requiremets
-- Go >= 1.22
+- Go >= 1.22.1
- PostgreSQL >= 16
- PostgREST >= 12
- Pandoc >= 3
diff --git a/citation.go b/citation.go
index e1b4a45b..2f2fbb52 100644
--- a/citation.go
+++ b/citation.go
@@ -7,8 +7,8 @@ import (
"strings"
// Caltech Library Packages
- "github.com/caltechlibrary/simplified"
"github.com/caltechlibrary/eprinttools"
+ "github.com/caltechlibrary/simplified"
)
// irdmtools provides a means of turning an EPrint or RDM record into a datastructure suitable
@@ -139,7 +139,6 @@ type Citation struct {
// Chapters from book
Chapters string `json:"chapters,omitempty" xml:"chapters,omitempty" yaml:"chapters,omitempty"`
-
// Series/SeriesNumber values from CaltechAUTHORS (mapped from custom fields)
Series string `json:"series,omitempty" xml:"series,omitempty" yaml:"series,omitempty"`
SeriesNumber string `json:"series_number,omitempty" xml:"series_number,omitempty" yaml:"series_number,omitempty"`
@@ -151,7 +150,6 @@ type Citation struct {
// Pages
Pages string `json:"pages,omitempty" xml:"pages,omitempty" yaml:"pages,omitempty"`
-
// ThesisDegree for thesis types
ThesisDegree string `json:"thesis_degree,omitempty" xml:"thesis_degree,omitempty" yaml:"thesis_degree,omitempty"`
@@ -255,19 +253,19 @@ func (cite *Citation) CrosswalkRecord(cName string, cID string, citeUsingURL str
if rec.Files != nil {
// In RDM the "default preview" is the primary document (e.g. article, thesis, etc) of record.
defaultPreview := rec.Files.DefaultPreview
- for _, entry := range rec.Files.Entries {
+ for _, entry := range rec.Files.Entries {
// Handle the case where default preview isn't set and treat the first file as the one
// serving as a default.
- if defaultPreview == "" {
- defaultPreview = entry.Key
- }
- if defaultPreview == entry.Key {
- cite.PrimaryObject = map[string]interface{}{
- "basename": defaultPreview,
- "url": fmt.Sprintf("%s/records/%s/files/%s", repoURL, rec.ID, defaultPreview),
- }
- }
- }
+ if defaultPreview == "" {
+ defaultPreview = entry.Key
+ }
+ if defaultPreview == entry.Key {
+ cite.PrimaryObject = map[string]interface{}{
+ "basename": defaultPreview,
+ "url": fmt.Sprintf("%s/records/%s/files/%s", repoURL, rec.ID, defaultPreview),
+ }
+ }
+ }
}
// Now crosswalk the rest of the citation from the simplified record.
@@ -481,7 +479,6 @@ func (ca *CitationAgent) ToString() string {
return fmt.Sprintf("%s, %s", ca.FamilyName, ca.LivedName)
}
-
// CrosswalkEPrint takes an eprinttools.EPrint record and return maps the values into the Citation.
func (cite *Citation) CrosswalkEPrint(cName string, cID string, citeUsingURL string, eprint *eprinttools.EPrint) error {
// map repository required fields, everything else is derived from crosswalk
@@ -504,7 +501,6 @@ func (cite *Citation) CrosswalkEPrint(cName string, cID string, citeUsingURL str
}
}
-
// from the eprint table
cite.Title = eprint.Title
cite.Type = eprint.Type
@@ -526,7 +522,7 @@ func (cite *Citation) CrosswalkEPrint(cName string, cID string, citeUsingURL str
cite.ISSN = eprint.ISSN
cite.DOI = eprint.DOI
cite.PMCID = eprint.PMCID
-
+
if eprint.ThesisType != "" {
cite.ThesisType = eprint.ThesisType
}
@@ -611,9 +607,9 @@ func (cite *Citation) CrosswalkEPrint(cName string, cID string, citeUsingURL str
cite.Contributor = append(cite.Contributor, agent)
}
}
- }
+ }
- // map in Thesis Adivors
+ // map in Thesis Adivors
if eprint.ThesisAdvisor.Length() > 0 {
for i := 0; i < eprint.ThesisAdvisor.Length(); i++ {
creator := eprint.ThesisAdvisor.IndexOf(i)
diff --git a/citation_test.go b/citation_test.go
index a38a8a07..9b7e5b11 100644
--- a/citation_test.go
+++ b/citation_test.go
@@ -27,16 +27,16 @@ func TestCitationCrosswalkRecord(t *testing.T) {
}
item := &Citation{}
- if err := item.CrosswalkRecord("rdm_website", "10.5281-inveniordm.1234", "", rec); err != nil {
+ if err := item.CrosswalkRecord("rdm_website", "10.5281-inveniordm.1234", "", "", rec); err != nil {
t.Error(err)
}
expectedS := "rdm_website"
- if item.Repository != expectedS {
- t.Errorf("incorrect repository, expected %q, got %q", expectedS, item.Repository)
+ if item.Collection != expectedS {
+ t.Errorf("incorrect repository, expected %q, got %q", expectedS, item.Collection)
}
expectedS = "10.5281-inveniordm.1234"
- if item.RepositoryRecordID != expectedS {
- t.Errorf("incorrect repo. rec. id, expected %q, got %q", expectedS, item.RepositoryRecordID)
+ if item.CollectionID != expectedS {
+ t.Errorf("incorrect repo. rec. id, expected %q, got %q", expectedS, item.CollectionID)
}
expectedS = "InvenioRDM"
if item.Title != expectedS {
@@ -208,17 +208,17 @@ func TestCrosswalkCreatorToCitationAgent(t *testing.T) {
contributorList := []*simplified.Creator{
&simplified.Creator{
PersonOrOrg: &simplified.PersonOrOrg{
- Name: "Nielsen, Lars Holm",
+ Name: "Nielsen, Lars Holm",
FamilyName: "Nielsen",
- GivenName: "Lars Holm",
- Type: "person",
+ GivenName: "Lars Holm",
+ Type: "person",
Identifiers: []*simplified.Identifier{
&simplified.Identifier{
- Scheme: "orcid",
+ Scheme: "orcid",
Identifier: "0000-0001-8135-3489",
},
&simplified.Identifier{
- Scheme: "clpid",
+ Scheme: "clpid",
Identifier: "Nielsen-Lars-Holm",
},
},
@@ -228,7 +228,7 @@ func TestCrosswalkCreatorToCitationAgent(t *testing.T) {
},
Affiliations: []*simplified.Affiliation{
&simplified.Affiliation{
- ID: "01ggx415",
+ ID: "01ggx415",
Name: "CERN",
},
},
@@ -238,8 +238,8 @@ func TestCrosswalkCreatorToCitationAgent(t *testing.T) {
&CitationAgent{
FamilyName: "Nielsen",
LivedName: "Lars Holm",
- ORCID: "0000-0001-8135-3489",
- CLpid: "Nielsen-Lars-Holm",
+ ORCID: "0000-0001-8135-3489",
+ CLpid: "Nielsen-Lars-Holm",
},
}
expectedRole := "editor"
diff --git a/cmd/doi2rdm/doi2rdm.go b/cmd/doi2rdm/doi2rdm.go
index 9aca91ec..e92ed1cc 100644
--- a/cmd/doi2rdm/doi2rdm.go
+++ b/cmd/doi2rdm/doi2rdm.go
@@ -54,7 +54,7 @@ var (
# SYNOPSIS
-{app_name} [OPTIONS] [OPTIONS_YAML] crossref|datacite DOI
+{app_name} [OPTIONS] [OPTIONS_YAML] [crossref|datacite] DOI
# DESCRIPTION
@@ -92,26 +92,35 @@ migrating content from CrossRef to RDM.
# EXAMPLES
+Save the default YAML options to a file. You can customize this to match your
+vocabulary requirements in your RDM deployment.
+
+~~~
+ {app_name} -show-yaml >options.yaml
+~~~
+
Example generating a JSON document for a single DOI. The resulting
text file is called "article.json". In this example "options.yaml"
-is the configuration file for setup for your RDM instance.
+is the configuration file for setup for your RDM instance. It'll first
+check CrossRef then DataCite.
~~~
- {app_name} options.yaml crossref "10.1021/acsami.7b15651" >article.json
+ {app_name} options.yaml "10.1021/acsami.7b15651" >article.json
~~~
Check to see the difference from the saved "article.json" and
-the current metadata retrieved from CrossRef.
+the current metadata retrieved from CrossRef or DataCite.
~~~
- {app_name} -diff article.json crossref doi2rdm.yaml "10.1021/acsami.7b15651
+ {app_name} -diff article.json options.yaml "10.1021/acsami.7b15651"
~~~
-Save the default YAML options to a file.
+Example getting metadata for an arXiv record from DataCite
~~~
- {app_name} -show-yaml >options.yaml
+ {app_name} options.yaml "arXiv:2312.07215"
~~~
+
`
)
@@ -166,12 +175,15 @@ func main() {
app.Cfg.Debug = false
}
- if len(args) < 2 {
- fmt.Fprintln(eout, "expected a 'crossref' or 'datacite' and single DOI on the command line")
- os.Exit(1)
- }
optionsFName, dataSource, doi := "", "", ""
- if len(args) > 2 {
+ if len(args) < 1 {
+ fmt.Fprintln(eout, "expected a least a single DOI on the command line")
+ os.Exit(1)
+ } else if len(args) == 1 {
+ optionsFName, dataSource, doi = "", "", args[0]
+ } else if len(args) == 2 {
+ optionsFName, dataSource, doi = args[0], "", args[1]
+ } else if len(args) > 2 {
optionsFName, dataSource, doi = args[0], args[1], args[2]
} else {
dataSource, doi = args[0], args[1]
@@ -182,14 +194,15 @@ func main() {
fmt.Fprintf(eout, "%s\n", err)
os.Exit(1)
}
- /*
case "datacite":
if err := app.RunDataCiteToRdm(in, out, eout, optionsFName, doi, diffFName); err != nil {
fmt.Fprintf(eout, "%s\n", err)
os.Exit(1)
}
- */
default:
- fmt.Fprintf(eout, "%q is not supported service to retrive DOI metadata\n", dataSource)
+ if err := app.RunDoiToRdmCombined(in, out, eout, optionsFName, doi, diffFName); err != nil {
+ fmt.Fprintf(eout, "%s\n", err)
+ os.Exit(1)
+ }
}
}
diff --git a/cmd/rdm2eprint/rdm2eprint.go b/cmd/rdm2eprint/rdm2eprint.go
index 3f4a8c23..7a32e30d 100644
--- a/cmd/rdm2eprint/rdm2eprint.go
+++ b/cmd/rdm2eprint/rdm2eprint.go
@@ -95,6 +95,9 @@ specified by C_NAME.
-pipeline
: read from standard input and write crosswalk to standard out.
+-latest
+: only convert record(s) if latest version.
+
# EXAMPLE
Example generating a EPRINT JSON document from RDM would use the following
@@ -146,6 +149,7 @@ func main() {
releaseDate := irdmtools.ReleaseDate
releaseHash := irdmtools.ReleaseHash
fmtHelp := irdmtools.FmtHelp
+ latestVersions := false
showHelp, showVersion, showLicense := false, false, false
configFName, debug, asXML := "", false, false
@@ -159,6 +163,7 @@ func main() {
flag.StringVar(&idsFName, "ids", idsFName, "read ids from a file")
flag.StringVar(&cName, "harvest", cName, "harvest JSON eprint records into the dataset collection.")
flag.BoolVar(&pipeline, "pipeline", pipeline, "read from standard input, crosswalk and write to standard out")
+ flag.BoolVar(&latestVersions, "latest", latestVersions, "only convert record if the latest version")
flag.Parse()
rdmids := flag.Args()
@@ -196,20 +201,20 @@ func main() {
os.Exit(1)
}
if cName != "" {
- if err := app.RunHarvest(os.Stdin, os.Stdout, os.Stderr, cName, rdmids); err != nil {
+ if err := app.RunHarvest(os.Stdin, os.Stdout, os.Stderr, cName, rdmids, latestVersions); err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
os.Exit(0)
}
if pipeline {
- if err := app.RunPipeline(os.Stdin, os.Stdout, os.Stderr, asXML); err != nil {
+ if err := app.RunPipeline(os.Stdin, os.Stdout, os.Stderr, asXML, latestVersions); err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
os.Exit(0)
}
- if err := app.Run(os.Stdin, os.Stdout, os.Stderr, rdmids, asXML); err != nil {
+ if err := app.Run(os.Stdin, os.Stdout, os.Stderr, rdmids, asXML, latestVersions); err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
diff --git a/cmd/rdmutil/rdmutil.go b/cmd/rdmutil/rdmutil.go
index c23d73a1..df0999e6 100644
--- a/cmd/rdmutil/rdmutil.go
+++ b/cmd/rdmutil/rdmutil.go
@@ -60,19 +60,9 @@ var (
# DESCRIPTION
__{app_name}__ is way of interacting with Invenio-RDM through its Postgres
-database (or a copy of that database) or web API (JSON and OAI-PMH).
-It was inspired the RDM JSON API but was implemented as a way of quickly
-processing data without the hinderance of rate limits (needed when RDM is
-public facing). If Postgres access is configured then it'll use SQL to
-retrieve the JSON data. It still uses the JSON
-API for update operations. {app_name} uses environment variables for
-configuration. For accessing the JSON API it uses the following.
-
-RDM_URL
-: the URL of the Invenio RDM API and OAI-PMH services
-
-RDMTOK
-: the token needed to access the Invenio RDM API and OAI-PMH services
+database. It does NOT use the OAI-PMH API since that is far too slow.
+{app_name} uses environment variables for configuration. For accessing the
+JSON API it uses the following.
dataset related environment variables (i.e. for harvest action)
@@ -129,11 +119,8 @@ for id retrieval. It is rate limited. Start and end dates are inclusive
and should be specific in YYYY-MM-DD format.
get_all_ids
-: Returns a list of all repository record ids latest versions. The method uses
-OAI-PMH for id retrieval is Postgress access is not configure. That can be
-terribly slow and rate limited. A test instance took 11 minutes to retrieve
-24000 record ids. If direct Postgres access is setup it queries the database
-directly (much much faster).
+: Returns a list of all repository record ids latest versions. The method
+requires Postgres database access.
get_all_stale_ids
: Returns a list of public record ids that are NOT the latest version of the
diff --git a/codemeta.json b/codemeta.json
index dcd27406..9a752ce8 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -5,10 +5,10 @@
"codeRepository": "https://github.com/caltechlibrary/irdmtools",
"dateCreated": "2022-10-27",
"dateRelease": "2024-01-24",
- "dateModified": "2024-03-25",
+ "dateModified": "2024-04-12",
"issueTracker": "https://github.com/caltechlibrary/irdmtools/issues",
"name": "irdmtools",
- "version": "0.0.76",
+ "version": "0.0.78",
"description": "Tools for working with institutional repositories and data management systems. Current implementation targets Invenio-RDM.",
"applicationCategory": "library science",
"releaseNotes": "Proof of concept and refinements, piloting in production",
@@ -29,7 +29,7 @@
"Windows"
],
"softwareRequirements": [
- "Go >= 1.22",
+ "Go >= 1.22.1",
"PostgreSQL >= 16",
"PostgREST >= 12",
"Pandoc >= 3",
diff --git a/config.go b/config.go
index 61599113..52268eac 100644
--- a/config.go
+++ b/config.go
@@ -39,6 +39,7 @@ import (
"fmt"
"os"
"database/sql"
+ "strings"
// Caltech Library packages
"github.com/caltechlibrary/dataset/v2"
@@ -52,6 +53,7 @@ type Config struct {
// Repository Name, e.g. CaltechAUTHORS, CaltechTHESIS, CaltechDATA
RepoName string `json:"repo_name,omitempty"`
// Repository ID, e.g. caltechauthors, caltechthesis, caltechdata (usually the db name for repository)
+ // NOTE: It should also match the Postgres DB name used by RDM
RepoID string `json:"repo_id,omitempty"`
// InvenioAPI holds the URL to the InvenioAPI
InvenioAPI string `json:"rdm_url,omitempty"`
@@ -111,6 +113,43 @@ func NewConfig() *Config {
return cfg
}
+// MakeDSN will return the value set for cfg.InvenioDSN or set and return it if
+// enough data is provided in the config.
+func (cfg *Config) MakeDSN() string {
+ if cfg.InvenioDSN == "" {
+ parts := []string{
+ "postgres://",
+ }
+ username := []string{}
+ if cfg.InvenioDbUser != "" {
+ username = append(username, cfg.InvenioDbUser)
+ }
+ if cfg.InvenioDbPassword != "" {
+ username = append(username, cfg.InvenioDbPassword)
+ }
+ if len(username) > 0 {
+ parts = append(parts, strings.Join(username, ":") + "@")
+ } else {
+ parts = append(parts, "")
+ }
+ if cfg.InvenioDbHost != "" {
+ parts = append(parts, cfg.InvenioDbHost)
+ }
+ if cfg.RepoID != "" {
+ parts = append(parts, "/" + cfg.RepoID)
+ }
+ if strings.HasPrefix(cfg.InvenioDbHost, "localhost") {
+ parts = append(parts, "?sslmode=disable")
+ } else {
+ parts = append(parts, "?sslmode=require")
+ }
+ if len(parts) > 1 {
+ return strings.Join(parts, "")
+ }
+ }
+ return cfg.InvenioDSN
+}
+
// LoadEnv checks the environment for configuration values if not
// previusly sets them. It will apply a prefix to the expected
// environment variable names if one is provided.
@@ -173,6 +212,12 @@ func (cfg *Config) LoadEnv(prefix string) error {
if rdmDbPassword := os.Getenv(prefixVar("RDM_DB_PASSWORD", prefix)); rdmDbPassword != "" {
cfg.InvenioDbPassword = rdmDbPassword
}
+ // Build our InvenioDSN
+ if rdmDSN := os.Getenv(prefixVar("RDM_DSN", prefix)); rdmDSN != "" {
+ cfg.InvenioDSN = rdmDSN
+ } else {
+ cfg.InvenioDSN = cfg.MakeDSN()
+ }
return nil
}
@@ -210,6 +255,10 @@ func (cfg *Config) LoadConfig(configFName string) error {
if err := JSONUnmarshal(src, &cfg); err != nil {
return err
}
+ // Build our DSN if not set.
+ if cfg.InvenioDSN == "" {
+ cfg.InvenioDSN = cfg.MakeDSN()
+ }
return nil
}
diff --git a/crossref.go b/crossref.go
index adbeb1d3..3bfb6ec5 100644
--- a/crossref.go
+++ b/crossref.go
@@ -418,23 +418,29 @@ func getWorksPublishedOnline(work *crossrefapi.Works) *simplified.DateType {
func getWorksPublicationDate(work *crossrefapi.Works) string {
printDate := getWorksPublishedPrint(work)
onlineDate := getWorksPublishedOnline(work)
- if (printDate == nil || printDate.Date == "") && (onlineDate == nil || onlineDate.Date == "") {
+ acceptedDate := getWorksAccepted(work)
+ if (printDate == nil || printDate.Date == "") && (onlineDate == nil || onlineDate.Date == "") && (acceptedDate == nil || acceptedDate.Date == ""){
return ""
}
- if printDate == nil || printDate.Date == "" {
- return onlineDate.Date
+ if (printDate != nil && printDate.Date != "") && (onlineDate != nil && onlineDate.Date != "") {
+ // NOTE: If we get this far we need to compare dates' date strings.
+ // This is a naive compare it assumes the date string formats are
+ // alphabetical.
+ i := strings.Compare(printDate.Date, onlineDate.Date)
+ if i < 0 || i == 0 {
+ return printDate.Date
+ }
}
- if onlineDate == nil || onlineDate.Date == "" {
+ if printDate != nil && printDate.Date != "" {
return printDate.Date
}
- // NOTE: If we get this far we need to compare dates' date strings.
- // This is a naive compare it assumes the date string formats are
- // alphabetical.
- i := strings.Compare(printDate.Date, onlineDate.Date)
- if i < 0 || i == 0 {
- return printDate.Date
+ if onlineDate != nil && onlineDate.Date != "" {
+ return onlineDate.Date
+ }
+ if acceptedDate != nil && acceptedDate.Date != "" {
+ return acceptedDate.Date
}
- return onlineDate.Date
+ return ""
}
func getWorksAccepted(work *crossrefapi.Works) *simplified.DateType {
diff --git a/datacite.go b/datacite.go
index 0dea7173..ef706b7e 100644
--- a/datacite.go
+++ b/datacite.go
@@ -4,6 +4,7 @@ import (
"fmt"
"os"
"path"
+ "strings"
"time"
// Caltech Library Packages
@@ -17,44 +18,68 @@ func QueryDataCiteObject(cfg *Config, doi string, options *Doi2RdmOptions) (map[
if err != nil {
return nil, err
}
- objects, err := client.Works(doi)
+ objects, err := client.Dois(doi)
if err != nil {
return nil, err
}
+ if len(objects) == 0 {
+ return nil, fmt.Errorf("no data returned for %q", doi)
+ }
src, _ := JSONMarshalIndent(objects, "", " ")
if cfg.Debug {
fmt.Fprintf(os.Stderr, "objects JSON:\n\n%s\n\n", src)
}
- m := map[string]interface{}{}
- if err := JSONUnmarshal(src, &m); err != nil {
+ m := map[string]interface{}{}
+ if err := JSONUnmarshal(src, &m); err != nil {
return nil, fmt.Errorf("problem encoding/decoding DataCite object, %s", err)
- }
+ }
return m, nil
}
-// getObjectData retrieves the `.access` from the DateCite `.object`
+// getObjectData retrieves the `.data` from the DateCite `.object`
func getObjectData(object map[string]interface{}) (map[string]interface{}, bool) {
if data, ok := object["data"].(map[string]interface{}); ok {
- return data, ok
+ return data, ok
}
return nil, false
}
-func getObjectDataAttributes(data map[string]interface{}) (map[string]interface{}, bool) {
- attr, ok := data["attributes"].(map[string]interface{})
- return attr, ok
+func getObjectDataAttributes(object map[string]interface{}) (map[string]interface{}, bool) {
+ if data, ok := getObjectData(object); ok {
+ attr, ok := data["attributes"].(map[string]interface{})
+ return attr, ok
+ }
+ return nil, false
}
-// getObjectCiteProcType retrieves the `.access.types.citeproc` value if exists.
-func getObjectCiteProcType(data map[string]interface{}) string {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if types, ok := attributes["types"].(map[string]string); ok {
- if citeproc, ok := types["citeproc"]; ok {
- return citeproc
+// getObjectTitle retrieves `.data.attributes["titles"]`
+func getObjectTitle(object map[string]interface{}) string {
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if title, ok := attrs["title"].(string); ok && (title != "") {
+ return title
+ }
+ if values, ok := attrs["titles"].([]interface{}); ok {
+ for _, val := range values {
+ m := val.(map[string]interface{})
+ if title, ok := m["title"].(string); ok {
+ return title
}
}
}
- return ""
+ }
+ return ""
+}
+
+// getObjectCiteProcType retrieves the `.access.types.citeproc` value if exists.
+func getObjectCiteProcType(object map[string]interface{}) string {
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if types, ok := attrs["types"].(map[string]string); ok {
+ if citeproc, ok := types["citeproc"]; ok {
+ return citeproc
+ }
+ }
+ }
+ return ""
}
// getObjectResourceType retrives the resource type from objects.message.type
@@ -69,57 +94,32 @@ func getObjectResourceType(object map[string]interface{}) string {
return ""
}
-// getObjcetDataTitles extracts a list of titles from a list of title objects.
-func getObjectDataTitles(data map[string]interface{}) ([]map[string]string, bool) {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if titles, ok := attributes["titles"].([]map[string]string); ok {
- return titles, ok
- }
- }
- return nil, false
-}
-
-// getObjectTitles retrieves an ordered list of titles from a DataCite Object object.
-// The zero index is the primary document title, the remaining are alternative titles.
-// If no titles are found then the slice of string will be empty.
-func getObjectTitles(object map[string]interface{}) []string {
- if data, ok := getObjectData(object); ok {
- if titleList, ok := getObjectDataTitles(data); ok {
- titles := []string {}
- for _, tObj := range titleList {
- if title, ok := tObj["title"]; ok {
- titles = append(titles, title)
+// getObjectDescription retrieves the description (a.k.a. abstract) from the DataCite Object
+// See example JSON
+func getObjectDescription(object map[string]interface{}) string {
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if descriptions, ok := attrs["descriptions"]; ok {
+ for _, item := range descriptions.([]interface{}) {
+ m := item.(map[string]interface{})
+ if values, ok := m["description"]; ok {
+ return fmt.Sprintf("%s", values)
}
}
- return titles
}
}
- return []string{}
-}
-
-// getObjectAbstract retrieves the abstract from the DataCite Object
-// See example JSON
-func getObjectAbstract(object map[string]interface{}) string {
- /* abstract doesn't seem to exist in Schema
- if data, ok := getObjectData(object); ok {
- if abstract, ok := data["abstract"]; ok {
- return data.(string)
- }
- }
- */
return ""
}
// getObjectPublisher
// See example JSON
func getObjectPublisher(object map[string]interface{}) string {
- // FIXME: Need to know if publisher holds the publisher and container type holds publication based on object.Message.Type
- if data, ok := getObjectData(object); ok {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if publisher, ok := attributes["publisher"].(map[string]string); ok {
- if name, ok := publisher["name"]; ok {
- return name
- }
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if publisher, ok := attrs["publisher"].(string); ok {
+ return publisher
+ }
+ if publisher, ok := attrs["publisher"].(map[string]string); ok {
+ if name, ok := publisher["name"]; ok {
+ return name
}
}
}
@@ -131,12 +131,14 @@ func getObjectPublisher(object map[string]interface{}) string {
func getObjectPublication(object map[string]interface{}) string {
if data, ok := getObjectData(object); ok {
if attributes, ok := getObjectDataAttributes(data); ok {
- if items, ok := attributes["relatedItems"].([]map[string]interface{}); ok {
+ if items, ok := attributes["relatedItems"].([]interface{}); ok {
for _, item := range items {
- if relationType, ok := item["relationType"]; ok && relationType == "IsPublishedIn" {
- if titles, ok := item["titles"].([]map[string]interface{}); ok {
+ m := item.(map[string]interface{})
+ if relationType, ok := m["relationType"].(string); ok && relationType == "IsPublishedIn" {
+ if titles, ok := m["titles"].([]interface{}); ok {
for _, title := range titles {
- if val, ok := title["title"].(string); ok {
+ m := title.(map[string]interface{})
+ if val, ok := m["title"].(string); ok {
return val
}
}
@@ -149,15 +151,14 @@ func getObjectPublication(object map[string]interface{}) string {
return ""
}
-// getObjectObjectSeries
+// getObjectSeries
func getObjectSeries(object map[string]interface{}) string {
- if data, ok := getObjectData(object); ok {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if items, ok := attributes["relatedItems"].([]map[string]interface{}); ok {
- for _, item := range items {
- if issue, ok := item["issue"].(string); ok {
- return issue
- }
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if items, ok := attrs["relatedItems"].([]interface{}); ok {
+ for _, item := range items {
+ m := item.(map[string]interface{})
+ if issue, ok := m["issue"].(string); ok {
+ return issue
}
}
}
@@ -165,15 +166,14 @@ func getObjectSeries(object map[string]interface{}) string {
return ""
}
-// getObjectObjectVolume
+// getObjectVolume
func getObjectVolume(object map[string]interface{}) string {
- if data, ok := getObjectData(object); ok {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if items, ok := attributes["relatedItems"].([]map[string]interface{}); ok {
- for _, item := range items {
- if issue, ok := item["volume"].(string); ok {
- return issue
- }
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if items, ok := attrs["relatedItems"].([]interface{}); ok {
+ for _, item := range items {
+ m := item.(map[string]interface{})
+ if issue, ok := m["volume"].(string); ok {
+ return issue
}
}
}
@@ -181,15 +181,14 @@ func getObjectVolume(object map[string]interface{}) string {
return ""
}
-// getObjectObjectIssue
+// getObjectIssue
func getObjectIssue(object map[string]interface{}) string {
- if data, ok := getObjectData(object); ok {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if items, ok := attributes["relatedItems"].([]map[string]interface{}); ok {
- for _, item := range items {
- if issue, ok := item["issue"].(string); ok {
- return issue
- }
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if items, ok := attrs["relatedItems"].([]interface{}); ok {
+ for _, item := range items {
+ m := item.(map[string]interface{})
+ if issue, ok := m["issue"].(string); ok {
+ return issue
}
}
}
@@ -197,24 +196,23 @@ func getObjectIssue(object map[string]interface{}) string {
return ""
}
-// getObjectObjectPublisherLocation
+// getObjectPublisherLocation
func getObjectPublisherLocation(object map[string]interface{}) string {
- /* Note sure where to find this. */
+ /* FIXME: Not sure where to find this. */
return ""
}
-// getObjectObjectPageRange
+// getObjectPageRange
func getObjectPageRange(object map[string]interface{}) string {
- if data, ok := getObjectData(object); ok {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if items, ok := attributes["relatedItems"].([]map[string]interface{}); ok {
- for _, item := range items {
- if firstPage, ok := item["firstPage"]; ok {
- if lastPage, ok := item["lastPage"]; ok {
- return fmt.Sprintf("%s - %s", firstPage, lastPage)
- }
- return fmt.Sprintf("%s - %s", firstPage, firstPage)
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if items, ok := attrs["relatedItems"].([]interface{}); ok {
+ for _, item := range items {
+ m := item.(map[string]interface{})
+ if firstPage, ok := m["firstPage"].(string); ok {
+ if lastPage, ok := m["lastPage"].(string); ok {
+ return fmt.Sprintf("%s - %s", firstPage, lastPage)
}
+ return fmt.Sprintf("%s - %s", firstPage, firstPage)
}
}
}
@@ -224,22 +222,20 @@ func getObjectPageRange(object map[string]interface{}) string {
// getObjectArticleNumber
func getObjectArticleNumber(object map[string]interface{}) string {
- /* FIXME: Not sure where article numbers map from in the DataCite API
- */
+ /* FIXME: Not sure where article numbers map from in the DataCite API */
return ""
}
// getObjectISBNs
func getObjectISBNs(object map[string]interface{}) []string {
isbns := []string{}
- if data, ok := getObjectData(object); ok {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if identifiers, ok := attributes["relatedIdentifiers"]; ok {
- for _, identifier := range identifiers.([]map[string]interface{}) {
- if identifierType, ok := identifier["relatedIdentifierType"]; ok && identifierType == "ISBN" {
- if val, ok := identifier["relatedIdentifier"].(string); ok {
- isbns = append(isbns, val)
- }
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if identifiers, ok := attrs["relatedIdentifiers"]; ok {
+ for _, item := range identifiers.([]interface{}) {
+ m := item.(map[string]interface{})
+ if identifierType, ok := m["relatedIdentifierType"].(string); ok && identifierType == "ISBN" {
+ if val, ok := m["relatedIdentifier"].(string); ok {
+ isbns = append(isbns, val)
}
}
}
@@ -249,16 +245,15 @@ func getObjectISBNs(object map[string]interface{}) []string {
}
// getObjectISSNs
-func getObjectISSNs(object map[string]interface{}) []string {
+func getObjectISSNs(object map[string]interface{}) []string {
issns := []string{}
- if data, ok := getObjectData(object); ok {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if identifiers, ok := attributes["relatedIdentifiers"]; ok {
- for _, identifier := range identifiers.([]map[string]interface{}) {
- if identifierType, ok := identifier["relatedIdentifierType"]; ok && identifierType == "ISSN" {
- if val, ok := identifier["relatedIdentifier"].(string); ok {
- issns = append(issns, val)
- }
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if identifiers, ok := attrs["relatedIdentifiers"].([]interface{}); ok {
+ for _, item := range identifiers {
+ m := item.(map[string]interface{})
+ if identifierType, ok := m["relatedIdentifierType"].(string); ok && identifierType == "ISSN" {
+ if val, ok := m["relatedIdentifier"].(string); ok {
+ issns = append(issns, val)
}
}
}
@@ -269,17 +264,37 @@ func getObjectISSNs(object map[string]interface{}) []string {
// getObjectFunding
func getObjectFunding(object map[string]interface{}) []*simplified.Funder {
- /* FIXME: Need to find example of where this is in DataCite JSON */
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ funders := []*simplified.Funder{}
+ if fundingReferences, ok := attrs["fundingReferences"].([]interface{}); ok {
+ for _, item := range fundingReferences {
+ m := item.(map[string]interface{})
+ funder := new(simplified.Funder)
+ if funderName, ok := m["funderName"].(string); ok {
+ funder.Funder = new(simplified.FunderIdentifier)
+ funder.Funder.Name = funderName
+ }
+ if awardNumber, ok := m["awardNumber"].(string); ok {
+ funder.Award = new(simplified.AwardIdentifier)
+ funder.Award.Number = awardNumber
+ }
+ if funder.Funder != nil || funder.Award != nil {
+ funders = append(funders, funder)
+ }
+ }
+ }
+ if len(funders) > 0 {
+ return funders
+ }
+ }
return nil
}
// getObjectDOI
func getObjectDOI(object map[string]interface{}) string {
- if data, ok := getObjectData(object); ok {
- if attributes, ok := getObjectDataAttributes(data); ok {
- if doi, ok := attributes["doi"].(string); ok {
- return doi
- }
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if doi, ok := attrs["doi"].(string); ok {
+ return doi
}
}
return ""
@@ -288,38 +303,116 @@ func getObjectDOI(object map[string]interface{}) string {
// getObjectLinks
func getObjectLinks(object map[string]interface{}) []*simplified.Identifier {
/* FIXME: Need to find an example of where this is in DataCite JSON */
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ links := []*simplified.Identifier{}
+ if url, ok := attrs["contentUrl"].(string); ok {
+ identifier := new(simplified.Identifier)
+ identifier.Scheme = "url"
+ identifier.Identifier = url
+ }
+ if len(links) > 0 {
+ return links
+ }
+ }
return nil
}
-func crosswalkObjectAuthorAffiliationToCreatorAffiliation(object map[string]interface{}) *simplified.Affiliation {
- /* FIXME: NEed to find an example of where this is in DataCite JSON */
- return nil
-}
+func getObjectAgents(object map[string]interface{}, agentType string) []*simplified.Creator {
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if peopleOrGroups, ok := attrs[agentType].([]interface{}); ok {
+ agents := []*simplified.Creator{}
+ for _, item := range peopleOrGroups {
+ entity := item.(map[string]interface{})
+ agent := new(simplified.Creator)
+ agent.PersonOrOrg = new(simplified.PersonOrOrg)
+ if name, ok := entity["name"].(string); ok {
+ agent.PersonOrOrg.Name = name
+ }
+ if family, ok := entity["familyName"].(string); ok {
+ agent.PersonOrOrg.FamilyName = family
+ }
+ if given, ok := entity["givenName"].(string); ok {
+ agent.PersonOrOrg.GivenName = given
+ }
+ if nameIdentifiers, ok := entity["nameIdentifiers"].([]interface{}); ok {
+ agent.PersonOrOrg.Identifiers = []*simplified.Identifier{}
+ for _, value := range nameIdentifiers {
+ if m, ok := value.(map[string]interface{}); ok {
+ id := &simplified.Identifier{}
+ if val, ok := m["nameIdentifier"].(string); ok {
+ if scheme, ok := m["nameIdentifierScheme"].(string); ok {
+ id.Scheme = scheme
+ if scheme == "ROR" {
+ id.ID = val
+ } else {
+ id.Identifier = val
+ }
+ agent.PersonOrOrg.Identifiers = append(agent.PersonOrOrg.Identifiers, id)
+ }
+ }
+ }
+ }
-func crosswalkObjectPersonToCreator(object map[string]interface{}) *simplified.Creator {
- /* FIXME: Need to figure this in DataCite JSON */
+ }
+ if agent.PersonOrOrg.Name != "" || agent.PersonOrOrg.FamilyName != "" {
+ agents = append(agents, agent)
+ }
+ }
+ return agents
+ }
+ }
return nil
}
func getObjectCreators(object map[string]interface{}) []*simplified.Creator {
- creators := []*simplified.Creator{}
- /* FIXME: Need to figure this out in DataCite JSON */
- return creators
+ return getObjectAgents(object, "creators")
}
func getObjectContributors(object map[string]interface{}) []*simplified.Creator {
- creators := []*simplified.Creator{}
- /* FIXME: Need to figure this out in DataCITE JSON */
- return creators
+ return getObjectAgents(object, "contributors")
}
func getObjectLicenses(object map[string]interface{}) []*simplified.Right {
- /* FIXME: Need ot figure this out in DataCite JSON */
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if rightsList, ok := attrs["rightsList"].([]interface{}); ok {
+ licenses := []*simplified.Right{}
+ for _, item := range rightsList {
+ license := new(simplified.Right)
+ license.Title = map[string]string{}
+ m := item.(map[string]interface{})
+ if title, ok := m["rights"].(string); ok {
+ license.Title["en"] = title
+ }
+ if identifier, ok := m["rightsIdentifier"].(string); ok {
+ license.ID = identifier
+ }
+ if license.ID != "" || len(license.Title) > 0 {
+ licenses = append(licenses, license)
+ }
+ }
+ if len(licenses) > 0 {
+ return licenses
+ }
+ }
+ }
return nil
}
func getObjectSubjects(object map[string]interface{}) []*simplified.Subject {
- /* FIXME: Need to figure this out in DataCite JSON */
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if items, ok := attrs["subjects"].([]interface{}); ok {
+ subjects := []*simplified.Subject{}
+ for _, item := range items {
+ m := item.(map[string]interface{})
+ if s, ok := m["subject"]; ok {
+ subject := new(simplified.Subject)
+ subject.Subject = s.(string)
+ subjects = append(subjects, subject)
+ }
+ }
+ return subjects
+ }
+ }
return nil
}
@@ -333,8 +426,41 @@ func getObjectPublishedOnline(object map[string]interface{}) *simplified.DateTyp
return nil
}
+// take a list of dates and map by their types.
+func mapDatesToType(items []interface{}) map[string]string {
+ dtMap := map[string]string{}
+ for _, item := range items {
+ m := item.(map[string]interface{})
+ if dateType, ok := m["dateType"].(string); ok {
+ dateType = strings.ToLower(dateType)
+ if val, ok := m["date"].(string); ok {
+ // Always take the first version of the provides types (no overwriting)
+ if _, conflict := m[dateType]; ! conflict {
+ dtMap[dateType] = val
+ }
+ }
+ }
+ }
+ return dtMap
+}
func getObjectPublicationDate(object map[string]interface{}) string {
- /* FIXME: Need to figure this out in DataCite JSON */
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if published, ok := attrs["published"].(string); ok {
+ return published
+ }
+ if items, ok := attrs["dates"].([]interface{}); ok {
+ // First look for "published" date
+ dates := mapDatesToType(items)
+ for _, dateType := range []string{"published", "issued", "accepted"} {
+ if val, ok := dates[dateType]; ok {
+ return val
+ }
+ }
+ }
+ if publicationYear, ok := attrs["publicationYear"].(int); ok {
+ return fmt.Sprintf("%d", publicationYear)
+ }
+ }
return ""
}
@@ -368,6 +494,15 @@ func normalizeObjectPublisherName(val string, object map[string]interface{}, opt
return val
}
+func getObjectIdentifier(object map[string]interface{}) string {
+ if attrs, ok := getObjectDataAttributes(object); ok {
+ if identifier, ok := attrs["identifier"].(string); ok {
+ return identifier
+ }
+ }
+ return ""
+}
+
// CrosswalkDataCiteObject takes a Object object from the DataCite API
// and maps the fields into an simplified Record struct return a
// new struct or error.
@@ -376,6 +511,8 @@ func CrosswalkDataCiteObject(cfg *Config, object map[string]interface{}, options
return nil, fmt.Errorf("crossref api objects not populated")
}
rec := new(simplified.Record)
+ rec.Metadata = new(simplified.Metadata)
+
// .message.type -> .record.metadata.resource_type (via controlled vocabulary)
if value := getObjectResourceType(object); value != "" {
if err := SetResourceType(rec, value, options.ResourceTypes); err != nil {
@@ -387,22 +524,13 @@ func CrosswalkDataCiteObject(cfg *Config, object map[string]interface{}, options
return nil, err
}
}
- if values := getObjectTitles(object); len(values) > 0 {
- for i, val := range values {
- if i == 0 {
- if err := SetTitle(rec, val); err != nil {
- return nil, err
- }
- } else {
- if err := AddAdditionalTitles(rec, makeSimpleTitleDetail(val)); err != nil {
- return nil, err
- }
- }
+ if val := getObjectTitle(object); val != "" {
+ if err := SetTitle(rec, val); err != nil {
+ return nil, err
}
}
- // NOTE: Abstract becomes Description in simplified records
- if value := getObjectAbstract(object); value != "" {
- if err := SetDescription(rec, value); err != nil {
+ if val := getObjectDescription(object); val != "" {
+ if err := SetDescription(rec, val); err != nil {
return nil, err
}
}
@@ -416,80 +544,70 @@ func CrosswalkDataCiteObject(cfg *Config, object map[string]interface{}, options
return nil, err
}
}
- if value := getObjectPublisher(object); value != "" {
- // FIXME: Setting the publisher name is going to be normalized via DOI prefix, maybe ISSN?
- value := normalizeObjectPublisherName(value, object, options)
- if err := SetPublisher(rec, value); err != nil {
+ if values := getObjectLicenses(object); values != nil {
+ if err := AddRights(rec, values); err != nil {
return nil, err
}
}
- if value := getObjectPublication(object); value != "" {
- if err := SetPublication(rec, value); err != nil {
+ if values := getObjectSubjects(object); values != nil {
+ if err := AddSubjects(rec, values); err != nil {
return nil, err
}
}
- /* FIXME: Need to know where this it's assigned in simplified model.
- Also the data I fetch from DataCite now looks like an alternate short
- title so objects.message["short-container-title"] may not be the right
- place to fetch this data.
- if value := getObjectSeries(object); value != "" {
- if err := SetSeries(rec, value); err != nil {
+ if values := getObjectFunding(object); values != nil && len(values) > 0 {
+ if err := SetFunding(rec, values); err != nil {
return nil, err
}
}
- */
- if value := getObjectVolume(object); value != "" {
- if err := SetVolume(rec, value); err != nil {
+ if val := getObjectPublisher(object); val != "" {
+ // NOTE: Setting the publisher name is going to be normalized via DOI prefix for records with ISSN.
+ val = normalizeObjectPublisherName(val, object, options)
+ if err := SetPublisher(rec, val); err != nil {
return nil, err
}
}
- if value := getObjectIssue(object); value != "" {
- if err := SetIssue(rec, value); err != nil {
+ if values := getObjectISBNs(object); values != nil && len(values) > 0 {
+ if err := SetImprintField(rec, "isbn", values); err != nil {
return nil, err
}
}
- if value := getObjectPublisherLocation(object); value != "" {
- if err := SetPublisherLocation(rec, value); err != nil {
+ if values := getObjectISSNs(object); len(values) > 0 {
+ if err := SetJournalField(rec, "issn", values[0]); err != nil {
return nil, err
}
- }
- if value := getObjectPageRange(object); value != "" {
- if err := SetPageRange(rec, value); err != nil {
- return nil, err
+ if len(values) > 1 {
+ for i := 1; i < len(values); i++ {
+ AddIdentifier(rec, "issn", values[i])
+ }
}
}
- if value := getObjectArticleNumber(object); value != "" {
- if err := SetArticleNumber(rec, value); err != nil {
+ if value := getObjectPublication(object); value != "" {
+ if err := SetPublication(rec, value); err != nil {
return nil, err
}
}
- if values := getObjectISBNs(object); values != nil && len(values) > 0 {
- if err := SetImprintField(rec, "isbn", values); err != nil {
+ if value := getObjectVolume(object); value != "" {
+ if err := SetVolume(rec, value); err != nil {
return nil, err
}
}
- if values := getObjectISSNs(object); len(values) > 0 {
- if err := SetJournalField(rec, "issn", values[0]); err != nil {
+ if value := getObjectIssue(object); value != "" {
+ if err := SetIssue(rec, value); err != nil {
return nil, err
}
- if len(values) > 1 {
- for i := 1; i < len(values); i++ {
- AddIdentifier(rec, "issn", values[i])
- }
- }
}
- if values := getObjectFunding(object); values != nil && len(values) > 0 {
- if err := SetFunding(rec, values); err != nil {
+ if value := getObjectPublisherLocation(object); value != "" {
+ if err := SetPublisherLocation(rec, value); err != nil {
return nil, err
}
}
- if values := getObjectLicenses(object); values != nil {
- if err := AddRights(rec, values); err != nil {
+ if value := getObjectPageRange(object); value != "" {
+ if err := SetPageRange(rec, value); err != nil {
return nil, err
}
}
- if values := getObjectSubjects(object); values != nil {
- if err := AddSubjects(rec, values); err != nil {
+ if value := getObjectArticleNumber(object); value != "" {
+ if err := SetArticleNumber(rec, value); err != nil {
return nil, err
}
}
diff --git a/datacite_test.go b/datacite_test.go
new file mode 100644
index 00000000..cdceef08
--- /dev/null
+++ b/datacite_test.go
@@ -0,0 +1,122 @@
+package irdmtools
+
+import (
+ //"fmt"
+ //"encoding/json"
+ "testing"
+)
+
+func TestQueryDataCiteObject(t *testing.T) {
+ ids := []string{
+ "10.22002/36sg9-yhj98",
+ "10.22002/D1.868",
+ "arXiv.2404.01326",
+ "arXiv:2312.07215",
+ "arXiv:2305.06519",
+ "arXiv:2312.03791",
+ "arXiv:2305.19279",
+ "arXiv:2305.05315",
+ "arXiv:2305.07673",
+ "arXiv:2111.03606",
+ "arXiv:2112.06016",
+ }
+ cfg := new(Config)
+ options := new(Doi2RdmOptions)
+ options.MailTo = "test@example.edu"
+ for _, id := range ids {
+ work, err := QueryDataCiteObject(cfg, id, options)
+ if err != nil {
+ t.Error(err)
+ }
+ title := getObjectTitle(work)
+ description := getObjectDescription(work)
+ doi := getObjectDOI(work)
+ publicationDate := getObjectPublicationDate(work)
+ if title == "" {
+ t.Errorf("expected title for %q, got empty string", id)
+ }
+ if description == "" {
+ t.Errorf("expected description for %q, got empty string", id)
+ }
+ if doi == "" {
+ t.Errorf("expected doi for %q, got empty string", id)
+ }
+ if publicationDate == "" {
+ t.Errorf("expected publicationDate for %q, got empty string", id)
+ }
+ }
+}
+
+func TestQueryDatasetDOI(t *testing.T) {
+ data := map[string]map[string]interface{}{
+ "10.22002/d1.868": map[string]interface{}{
+ "title": "caltechlibrary/dataset: issues #13, #14, #15 fixes",
+ "author": []map[string]string{
+ map[string]string{
+ "literal": "Robert Doiel",
+ },
+ },
+ "description": "This release is primarily implementing fixes.",
+ "doi": "10.22002/d1.868",
+ "identifier": "https://doi.org/10.22002/d1.868",
+ "published": "2018",
+ },
+ }
+
+ cfg := new(Config)
+ options := new(Doi2RdmOptions)
+ options.MailTo = "test@example.edu"
+ for doi, expected := range data {
+ m, err := QueryDataCiteObject(cfg, doi, options)
+ if err != nil {
+ t.Error(err)
+ }
+ if m == nil || len(m) == 0 {
+ t.Errorf("no data returned for doi %q", doi)
+ }
+ expectedS := expected["title"]
+ gotS := getObjectTitle(m)
+ if expectedS != gotS {
+ t.Errorf("expected (%q) to get %q, got %q", doi, expectedS, gotS)
+ }
+ creators := getObjectCreators(m)
+ if creators != nil {
+ for i, creator := range creators {
+ if i > 0 {
+ t.Errorf("got too many authors, %+v", creators)
+ break
+ }
+ if creator.PersonOrOrg == nil {
+ t.Errorf("creator.PersonOrOrg is nil")
+ } else if creator.PersonOrOrg.Name != "Robert Doiel" {
+ t.Errorf("expected \"Robert Doiel\", got %q", creator.PersonOrOrg.Name)
+ }
+ }
+ } else {
+ t.Errorf("expected authors, got nil")
+ }
+ doiVal := getObjectDOI(m)
+ if doi != doiVal {
+ t.Errorf("expected DOI (%T) %q, got (%T) %q", doi, doi, doiVal, doiVal)
+ }
+ expectedS = "This release is primarily implementing fixes."
+ gotS = getObjectDescription(m)
+ if expectedS != gotS {
+ t.Errorf("expected description %q, got %q", expectedS, gotS)
+ }
+ expectedS = "2018"
+ gotS = getObjectPublicationDate(m)
+ if expectedS != gotS {
+ t.Errorf("expected published %q, got %q", expectedS, gotS)
+ }
+ expectedS = "https://doi.org/10.22002/d1.868"
+ gotS = getObjectIdentifier(m)
+ if expectedS != gotS {
+ t.Errorf("expected identifier %q, got %q", expectedS, gotS)
+ }
+ /*
+ src, _ := json.MarshalIndent(m, "", " ")
+ fmt.Printf("DEBUG m ->\n%s\n", src)
+ */
+ }
+}
diff --git a/doi2rdm.1.html b/doi2rdm.1.html
index aa9b9d0a..8dbccc59 100644
--- a/doi2rdm.1.html
+++ b/doi2rdm.1.html
@@ -27,7 +27,7 @@ NAME
doi2rdm
SYNOPSIS
doi2rdm OPTIONS OPTIONS_YAML crossref|datacite DOI
+href="#options_yaml">OPTIONS_YAML [crossref|datacite] DOI
DESCRIPTION
doi2rdm is a Caltech Library oriented command line application that
takes a DOI, queries the CrossRef or DataCite API then returns a JSON
@@ -67,15 +67,19 @@
OPTIONS
EXAMPLES
+Save the default YAML options to a file. You can customize this to
+match your vocabulary requirements in your RDM deployment.
+ doi2rdm -show-yaml >options.yaml
Example generating a JSON document for a single DOI. The resulting
text file is called “article.json”. In this example “options.yaml” is
-the configuration file for setup for your RDM instance.
- doi2rdm options.yaml crossref "10.1021/acsami.7b15651" >article.json
+the configuration file for setup for your RDM instance. It’ll first
+check CrossRef then DataCite.
+ doi2rdm options.yaml "10.1021/acsami.7b15651" >article.json
Check to see the difference from the saved “article.json” and the
-current metadata retrieved from CrossRef.
- doi2rdm -diff article.json crossref doi2rdm.yaml "10.1021/acsami.7b15651
-Save the default YAML options to a file.
- doi2rdm -show-yaml >options.yaml
+current metadata retrieved from CrossRef or DataCite.
+ doi2rdm -diff article.json options.yaml "10.1021/acsami.7b15651"
+Example getting metadata for an arXiv record from DataCite
+ doi2rdm options.yaml "arXiv:2312.07215"
diff --git a/doi2rdm.1.md b/doi2rdm.1.md
index 2e56045b..c704a910 100644
--- a/doi2rdm.1.md
+++ b/doi2rdm.1.md
@@ -1,6 +1,6 @@
-%doi2rdm(1) irdmtools user manual | version 0.0.76 1203a801
+%doi2rdm(1) irdmtools user manual | version 0.0.78 95ba3295
% R. S. Doiel and Tom Morrell
-% 2024-03-28
+% 2024-04-12
# NAME
@@ -8,7 +8,7 @@ doi2rdm
# SYNOPSIS
-doi2rdm [OPTIONS] [OPTIONS_YAML] crossref|datacite DOI
+doi2rdm [OPTIONS] [OPTIONS_YAML] [crossref|datacite] DOI
# DESCRIPTION
@@ -46,24 +46,33 @@ migrating content from CrossRef to RDM.
# EXAMPLES
+Save the default YAML options to a file. You can customize this to match your
+vocabulary requirements in your RDM deployment.
+
+~~~
+ doi2rdm -show-yaml >options.yaml
+~~~
+
Example generating a JSON document for a single DOI. The resulting
text file is called "article.json". In this example "options.yaml"
-is the configuration file for setup for your RDM instance.
+is the configuration file for setup for your RDM instance. It'll first
+check CrossRef then DataCite.
~~~
- doi2rdm options.yaml crossref "10.1021/acsami.7b15651" >article.json
+ doi2rdm options.yaml "10.1021/acsami.7b15651" >article.json
~~~
Check to see the difference from the saved "article.json" and
-the current metadata retrieved from CrossRef.
+the current metadata retrieved from CrossRef or DataCite.
~~~
- doi2rdm -diff article.json crossref doi2rdm.yaml "10.1021/acsami.7b15651
+ doi2rdm -diff article.json options.yaml "10.1021/acsami.7b15651"
~~~
-Save the default YAML options to a file.
+Example getting metadata for an arXiv record from DataCite
~~~
- doi2rdm -show-yaml >options.yaml
+ doi2rdm options.yaml "arXiv:2312.07215"
~~~
+
diff --git a/doi2rdm.go b/doi2rdm.go
index b82b1dcc..611111ad 100644
--- a/doi2rdm.go
+++ b/doi2rdm.go
@@ -38,6 +38,7 @@ import (
"fmt"
"io"
"os"
+ "strings"
// 3rd Party packages
"gopkg.in/yaml.v3"
@@ -53,14 +54,14 @@ type Doi2Rdm struct {
}
type Doi2RdmOptions struct {
- MailTo string `json:"mailto,omitempty" yaml:"mailto"`
- Download bool `json:"download,omitempty" yaml:"download"`
- DotInitials bool `json:"dot_initials,omitempty" yaml:"dot_initials"`
- ContributorTypes map[string]string `json:"contributor_types,omitemptpy" yaml:"contributor_types"`
- ResourceTypes map[string]string `json:"resource_types,omitempty" yaml:"resource_types"`
+ MailTo string `json:"mailto,omitempty" yaml:"mailto"`
+ Download bool `json:"download,omitempty" yaml:"download"`
+ DotInitials bool `json:"dot_initials,omitempty" yaml:"dot_initials"`
+ ContributorTypes map[string]string `json:"contributor_types,omitemptpy" yaml:"contributor_types"`
+ ResourceTypes map[string]string `json:"resource_types,omitempty" yaml:"resource_types"`
DoiPrefixPublishers map[string]string `json:"doi_prefix_publishers,omitempty" yaml:"doi_prefix_publishers"`
- ISSNPublishers map[string]string `json:"issn_publishers,omitempty" yaml:"issn_publishers"`
- Debug bool `json:"debug,omitempty" yaml:"debug"`
+ ISSNPublishers map[string]string `json:"issn_publishers,omitempty" yaml:"issn_publishers"`
+ Debug bool `json:"debug,omitempty" yaml:"debug"`
}
var (
@@ -246,13 +247,13 @@ func (app *Doi2Rdm) Configure(configFName string, envPrefix string, debug bool)
// ```
//
// app := new(irdmtools.Doi2Rdm)
-// // Load irdmtools settings
+// // Load irdmtools settings
// if err := app.LoadConfig("irdmtools.json"); err != nil {
// // ... handle error ...
// }
-// // If options are provided then we need to set the filename
-// optionsFName := "doi2rdm.yaml"
-// doi := "10.48550/arXiv.2104.02480"
+// // If options are provided then we need to set the filename
+// optionsFName := "doi2rdm.yaml"
+// doi := "10.3847/1538-3881/ad2765"
// src, err := app.Run(os.Stdin, os.Stdout, os.Stderr, optionFName, doi, "", false)
// if err != nil {
// // ... handle error ...
@@ -327,19 +328,19 @@ func (app *Doi2Rdm) RunCrossRefToRdm(in io.Reader, out io.Writer, eout io.Writer
//
// ```
//
-// app := new(irdmtools.Doi2Rdm)
-// // Load irdmtools settings
-// if err := app.LoadConfig("irdmtools.json"); err != nil {
-// // ... handle error ...
-// }
-// // If options are provided then we need to set the filename
-// optionsFName := "doi2rdm.yaml"
-// doi := "10.48550/arXiv.2104.02480"
-// src, err := app.RunDataCiteToRdm(os.Stdin, os.Stdout, os.Stderr, optionFName, doi, "", false)
-// if err != nil {
-// // ... handle error ...
-// }
-// fmt.Printf("%s\n", src)
+// app := new(irdmtools.Doi2Rdm)
+// // Load irdmtools settings
+// if err := app.LoadConfig("irdmtools.json"); err != nil {
+// // ... handle error ...
+// }
+// // If options are provided then we need to set the filename
+// optionsFName := "doi2rdm.yaml"
+// doi := "10.48550/arXiv.2104.02480"
+// src, err := app.RunDataCiteToRdm(os.Stdin, os.Stdout, os.Stderr, optionFName, doi, "", false)
+// if err != nil {
+// // ... handle error ...
+// }
+// fmt.Printf("%s\n", src)
//
// ```
func (app *Doi2Rdm) RunDataCiteToRdm(in io.Reader, out io.Writer, eout io.Writer, optionFName, doi string, diffFName string) error {
@@ -387,6 +388,9 @@ func (app *Doi2Rdm) RunDataCiteToRdm(in io.Reader, out io.Writer, eout io.Writer
if err != nil {
return err
}
+ if len(nWork) == 0 {
+ return fmt.Errorf("not data received for %q", doi)
+ }
nRecord, err = CrosswalkDataCiteObject(app.Cfg, nWork, options)
if err != nil {
return err
@@ -402,3 +406,39 @@ func (app *Doi2Rdm) RunDataCiteToRdm(in io.Reader, out io.Writer, eout io.Writer
fmt.Fprintf(out, "%s\n", src)
return nil
}
+
+// RunDoiToRDMCombined implements the doi2rdm cli behaviors using the CrossRead and DataCite service.
+// With the exception of the "setup" action you should call `app.LoadConfig()` before execute
+// Run.
+//
+// ```
+//
+// app := new(irdmtools.Doi2Rdm)
+// // Load irdmtools settings
+// if err := app.LoadConfig("irdmtools.json"); err != nil {
+// // ... handle error ...
+// }
+// // If options are provided then we need to set the filename
+// optionsFName := "doi2rdm.yaml"
+// doi := "10.48550/arXiv.2104.02480"
+// src, err := app.RunDoiToRdmCombined(os.Stdin, os.Stdout, os.Stderr, optionFName, doi, "", false)
+// if err != nil {
+// // ... handle error ...
+// }
+// fmt.Printf("%s\n", src)
+//
+// ```
+func (app *Doi2Rdm) RunDoiToRdmCombined(in io.Reader, out io.Writer, eout io.Writer, optionFName, doi string, diffFName string) error {
+ // Do we have an arXiv id?
+ if strings.HasPrefix(strings.ToLower(doi), "arxiv:") {
+ return app.RunDataCiteToRdm(in, out, eout, optionFName, doi, diffFName)
+ }
+ if crErr := app.RunCrossRefToRdm(in, out, eout, optionFName, doi, diffFName); crErr != nil {
+ // Then try DataCiteToRdm
+ if dcErr := app.RunDataCiteToRdm(in, out, eout, optionFName, doi, diffFName); dcErr != nil {
+ return fmt.Errorf("crossref: %s, datacite: %s", crErr, dcErr)
+ }
+ return nil
+ }
+ return nil
+}
diff --git a/doi2rdm_test.go b/doi2rdm_test.go
new file mode 100644
index 00000000..9d13884a
--- /dev/null
+++ b/doi2rdm_test.go
@@ -0,0 +1,54 @@
+package irdmtools
+
+import (
+ //"os"
+ "testing"
+)
+
+func TestDataCiteToRDM(t *testing.T) {
+ //FIXME: Need an arxiv DOI to look up at DataCite
+ arxiv_ids := []string{
+ "10.22002/D1.868",
+ "arXiv:2312.07215",
+ "arXiv:2305.06519",
+ "arXiv:2312.03791",
+ "arXiv:2305.19279",
+ "arXiv:2305.05315",
+ "arXiv:2305.07673",
+ "arXiv:2111.03606",
+ "arXiv:2112.06016",
+ /* these arXiv ids don't seem to have data at DataCite ... */
+ //"arXiv:2402.12335v1",
+ //"arXiv:2401.12460v1",
+ //"arXiv:2204.13532v2",
+ }
+ app := new(Doi2Rdm)
+ app.Cfg = new(Config)
+ for _, doi := range arxiv_ids {
+ options := new(Doi2RdmOptions)
+ options.MailTo = "dld-test@library.caltech.edu"
+ obj, err := QueryDataCiteObject(app.Cfg, doi, options)
+ if err != nil {
+ t.Error(err)
+ }
+ if obj == nil {
+ t.Errorf("expected a non-nil object for %q", doi)
+ }
+ record, err := CrosswalkDataCiteObject(app.Cfg, obj, options)
+ if err != nil {
+ t.Errorf("%s", err)
+ t.FailNow()
+ }
+ if record == nil {
+ t.Errorf("expected a non-nil record for doi %q", doi)
+ t.FailNow()
+ }
+ /*
+ optionsFName, diffFName := "", ""
+ if err := app.RunDataCiteToRdm(os.Stdin, os.Stdout, os.Stderr, optionsFName, doi, diffFName); err != nil {
+ t.Error(err)
+ t.FailNow()
+ }
+ */
+ }
+}
diff --git a/ep3ds2citations.1.md b/ep3ds2citations.1.md
index f9ce713f..b53bedb3 100644
--- a/ep3ds2citations.1.md
+++ b/ep3ds2citations.1.md
@@ -1,6 +1,6 @@
-%ep3ds2citations(1) irdmtools user manual | version 0.0.76 1203a801
+%ep3ds2citations(1) irdmtools user manual | version 0.0.78 95ba3295
% R. S. Doiel and Tom Morrell
-% 2024-03-28
+% 2024-04-12
# NAME
diff --git a/ep3util.1.md b/ep3util.1.md
index 1672694a..272bd01b 100644
--- a/ep3util.1.md
+++ b/ep3util.1.md
@@ -1,6 +1,6 @@
-%ep3util(1) irdmtools user manual | version 0.0.76 1203a801
+%ep3util(1) irdmtools user manual | version 0.0.78 95ba3295
% R. S. Doiel and Tom Morrell
-% 2024-03-28
+% 2024-04-12
# NAME
diff --git a/eprint2rdm.1.md b/eprint2rdm.1.md
index aa419d20..e59b26e5 100644
--- a/eprint2rdm.1.md
+++ b/eprint2rdm.1.md
@@ -1,6 +1,6 @@
-%eprint2rdm(1) irdmtools user manual | version 0.0.76 1203a801
+%eprint2rdm(1) irdmtools user manual | version 0.0.78 95ba3295
% R. S. Doiel and Tom Morrell
-% 2024-03-28
+% 2024-04-12
# NAME
diff --git a/eprintrest.1.md b/eprintrest.1.md
index 5267fdcc..745e2fd6 100644
--- a/eprintrest.1.md
+++ b/eprintrest.1.md
@@ -1,6 +1,6 @@
-%eprintrest(1) irdmtools user manual | version 0.0.76 1203a801
+%eprintrest(1) irdmtools user manual | version 0.0.78 95ba3295
% R. S. Doiel and Tom Morrell
-% 2024-03-28
+% 2024-04-12
# NAME
diff --git a/go.mod b/go.mod
index b089f6bb..07a36417 100644
--- a/go.mod
+++ b/go.mod
@@ -1,28 +1,26 @@
module github.com/caltechlibrary/irdmtools
-go 1.22
+go 1.22.1
require (
github.com/caltechlibrary/crossrefapi v1.0.7
- github.com/caltechlibrary/dataciteapi v1.0.2
+ github.com/caltechlibrary/dataciteapi v1.1.0
github.com/caltechlibrary/dataset/v2 v2.1.10
github.com/caltechlibrary/eprinttools v1.3.7
github.com/caltechlibrary/pairtree v1.0.3
- github.com/caltechlibrary/simplified v0.0.22
+ github.com/caltechlibrary/simplified v0.0.23
github.com/go-sql-driver/mysql v1.7.1
github.com/lib/pq v1.10.9
gopkg.in/yaml.v3 v3.0.1
)
require (
- github.com/caltechlibrary/dataset v1.1.0 // indirect
github.com/caltechlibrary/doitools v0.0.2 // indirect
github.com/caltechlibrary/dotpath v0.0.4 // indirect
github.com/caltechlibrary/dsv1 v0.0.0-20220817192039-7c2741c5699d // indirect
github.com/caltechlibrary/semver v0.0.0-20220817184719-a504da2d5c6a // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/glebarez/go-sqlite v1.22.0 // indirect
- github.com/gofrs/uuid v4.4.0+incompatible // indirect
github.com/google/uuid v1.5.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
diff --git a/go.sum b/go.sum
index ed636398..527a3760 100644
--- a/go.sum
+++ b/go.sum
@@ -1,156 +1,59 @@
-github.com/caltechlibrary/crossrefapi v1.0.6 h1:N/W9ugyza6FZvF8WbUIKnzAWe7ZvJWQ/kw3292kdHwQ=
-github.com/caltechlibrary/crossrefapi v1.0.6/go.mod h1:TAUa0UzInUtw0Z155kWjSIU+ljvNVL6dCAInjC0C8Jc=
github.com/caltechlibrary/crossrefapi v1.0.7 h1:5ENH6TkeObClqJJtlomcj7EtRn6FG/l4PN8sMULCC3g=
github.com/caltechlibrary/crossrefapi v1.0.7/go.mod h1:faIavFUD3wzlYceE4lknT9+taxVGERhUTIeNKrPnU8M=
-github.com/caltechlibrary/dataciteapi v1.0.1 h1:qFkqdbEOT4trKcpqeLg08NUgDObMOgjAJhegP0sop8Q=
-github.com/caltechlibrary/dataciteapi v1.0.1/go.mod h1:T6oYVCoQyHRi/0qPI+K5PM//F7M5YkrHqzVMfrPlsn8=
github.com/caltechlibrary/dataciteapi v1.0.2 h1:9TIcndMXW2atdLbpPu9yQGPnUAZxlLBAR8lLjKKfTg0=
github.com/caltechlibrary/dataciteapi v1.0.2/go.mod h1:UmHR9MjWJ0jurkwWPlx2CN6GX92MlEPmUB1oL3QOGrs=
-github.com/caltechlibrary/dataset v1.1.0 h1:eunb0XhQrq3Ho/PcCNbV6J5JROHtAnqUXI+hQPtD6a0=
-github.com/caltechlibrary/dataset v1.1.0/go.mod h1:3zskOOrgkVo/ul5GRnSvS8W9/Aj2D45j+lYbcIEMSao=
-github.com/caltechlibrary/dataset/v2 v2.1.8 h1:K3M7eOR73xS3tttbjkEp/YDencRV0Yc4IUBwp9Wt+tA=
-github.com/caltechlibrary/dataset/v2 v2.1.8/go.mod h1:x5zjOk0PcHbm3a3Z4NyUB42Hf2/jTMAKFFV8dcTgnPY=
+github.com/caltechlibrary/dataciteapi v1.0.3 h1:ti+wpguAnkowKGfMBkZtCdbqoi1qRqY0HNwbD0YB618=
+github.com/caltechlibrary/dataciteapi v1.0.3/go.mod h1:ybVNJ+lO3RAAQK0X9G/fLs90+YjUHVani7BmqtOd2zg=
+github.com/caltechlibrary/dataciteapi v1.1.0 h1:w2AUa6w4KnZOrXUp5J8/BvalgoDUhEuB+EnG482GeLg=
+github.com/caltechlibrary/dataciteapi v1.1.0/go.mod h1:ybVNJ+lO3RAAQK0X9G/fLs90+YjUHVani7BmqtOd2zg=
github.com/caltechlibrary/dataset/v2 v2.1.10 h1:LUMPioJEsWSqQZgabljSClQIFvRUsx8Y97RCojCRk6w=
github.com/caltechlibrary/dataset/v2 v2.1.10/go.mod h1:drSS08cTl4KGhHvsT/kUZO0PghyBCAF/UpRZnm7INHk=
-github.com/caltechlibrary/doitools v0.0.1 h1:c6lvI5l9juo019GGUvuwbzeyv1c6VtSi8mkUv1drgwY=
-github.com/caltechlibrary/doitools v0.0.1/go.mod h1:1K36p/UIkIFM0JVaIVtFStKsFu5xjjYWMWAUExmFnP0=
github.com/caltechlibrary/doitools v0.0.2 h1:bGFbvEo9scKlYHCAgZCrQO63GwZZDTL7siT0rVTYwmc=
github.com/caltechlibrary/doitools v0.0.2/go.mod h1:fk83G4/el4EgvWxCwZLAeXvqzL816LdrFCHrkg7Zpp0=
github.com/caltechlibrary/dotpath v0.0.4 h1:ghc3XZefLPDhQnUo9oXqvz8vfh52QOE0BjK5qOD9RQU=
github.com/caltechlibrary/dotpath v0.0.4/go.mod h1:rAu0NPuhTaEa9szxXq92x/JmMmedjsEEdHl5uTxzzs0=
github.com/caltechlibrary/dsv1 v0.0.0-20220817192039-7c2741c5699d h1:SGz3rTkjsp/8tEkFXTRHaJxqyeeDiL210QD69Bn7bd0=
github.com/caltechlibrary/dsv1 v0.0.0-20220817192039-7c2741c5699d/go.mod h1:ajUo9ZOowgXjLfDXeUdMmJit9CZyHUzgaPIHUh9TBcg=
-github.com/caltechlibrary/eprinttools v1.3.6 h1:uhfvyDvY+paRlU56SWWix8viLEKAqZSl5ZYtTxFpFCI=
-github.com/caltechlibrary/eprinttools v1.3.6/go.mod h1:5qi6bfj5hmTBiNjlM7g539+NwxNJWNrj6wzCp1xCmlI=
-github.com/caltechlibrary/eprinttools v1.3.7-0.20240111185310-37e62dc06599 h1:jXjDyL2dEAo95rzK8BPMfAhcfD1MtVH8+AypAidcmEE=
-github.com/caltechlibrary/eprinttools v1.3.7-0.20240111185310-37e62dc06599/go.mod h1:iLGUXTfOrOm3P9SVzxIME0cqcOyy+oJzNBxjLNukavg=
-github.com/caltechlibrary/eprinttools v1.3.7-0.20240111193740-208de7f3f863 h1:qWPvJvYb+/gHYk5ya5RTx4PQcLYUi0qGhLG+tcJwtfQ=
-github.com/caltechlibrary/eprinttools v1.3.7-0.20240111193740-208de7f3f863/go.mod h1:iLGUXTfOrOm3P9SVzxIME0cqcOyy+oJzNBxjLNukavg=
github.com/caltechlibrary/eprinttools v1.3.7 h1:W2iPXhV6aQqFkvnyB+OCm4/abbm4JzNczaY6Xv3wukc=
github.com/caltechlibrary/eprinttools v1.3.7/go.mod h1:kdhX3fCCOQEzebbYspj17IJEZKnvRyRbRuku9U7Cs6k=
-github.com/caltechlibrary/pairtree v1.0.2 h1:zcsFibPW7tBvjOvbxHzkE1CIPm0BxUCfQrLCa+bB5qc=
-github.com/caltechlibrary/pairtree v1.0.2/go.mod h1:VZdWqmXgJ+tMxM37n/W+CjS8CQVPus+0ReFdRL10AS4=
github.com/caltechlibrary/pairtree v1.0.3 h1:ykaydbmdyI1Doszaw0rvScKSXcU7HbotCQpNTlotX7s=
github.com/caltechlibrary/pairtree v1.0.3/go.mod h1:7jeP5TyT9ilM+TTRklwrIbUWI/uGuQFm06vrhmgcS5U=
github.com/caltechlibrary/semver v0.0.0-20220817184719-a504da2d5c6a h1:3q6ct6FfFDF2dEiW06Ran7iEOZ4d9HBiMbkqJUOO2oU=
github.com/caltechlibrary/semver v0.0.0-20220817184719-a504da2d5c6a/go.mod h1:LxzDpCilL3QqjL8qdirgmlo8riDkSFNsTALfIYYyQtE=
-github.com/caltechlibrary/simplified v0.0.22 h1:OAAY/8NwLSD1LPMzj/wDCIMEsltmy1zAZQ98pXZhjtk=
-github.com/caltechlibrary/simplified v0.0.22/go.mod h1:T36AVk2yLIhH/0zlM3wTeVEFgcsSn2zqdqFVanntSPc=
-github.com/caltechlibrary/simplified v0.0.23-0.20231114225710-392ad9bfa56e h1:wSrrsl/q4BMuZcTTT7TqA1DKYk0if7tGAthZIOH2Ri8=
-github.com/caltechlibrary/simplified v0.0.23-0.20231114225710-392ad9bfa56e/go.mod h1:T36AVk2yLIhH/0zlM3wTeVEFgcsSn2zqdqFVanntSPc=
-github.com/caltechlibrary/simplified v0.0.23-0.20231114231003-718cd0a30ee5 h1:uj6XzSImzOQJFDLHbv8fGaasn/2aFDMeEw1Z1BasW5I=
-github.com/caltechlibrary/simplified v0.0.23-0.20231114231003-718cd0a30ee5/go.mod h1:T36AVk2yLIhH/0zlM3wTeVEFgcsSn2zqdqFVanntSPc=
-github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
+github.com/caltechlibrary/simplified v0.0.23 h1:uWJ/WEX7f82+o952QkxfLVO0XBXUIiCUdxwvDzUdJto=
+github.com/caltechlibrary/simplified v0.0.23/go.mod h1:GuYwUEySYBN6+PCNEttSPdM6QNw0lIzBM68aTLnNGDU=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/glebarez/go-sqlite v1.18.1 h1:w0xtxKWktqYsUsXg//SQK+l1IcpKb3rGOQHmMptvL2U=
-github.com/glebarez/go-sqlite v1.18.1/go.mod h1:ydXIGq2M4OzF4YyNhH129SPp7jWoVvgkEgb6pldmS0s=
github.com/glebarez/go-sqlite v1.22.0 h1:uAcMJhaA6r3LHMTFgP0SifzgXg46yJkgxqyuyec+ruQ=
github.com/glebarez/go-sqlite v1.22.0/go.mod h1:PlBIdHe0+aUEFn+r2/uthrWq4FxbzugL0L8Li6yQJbc=
github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
-github.com/gofrs/uuid v4.4.0+incompatible h1:3qXRTX8/NbyulANqlc0lchS1gqAVxRgsuW1YrTJupqA=
-github.com/gofrs/uuid v4.4.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
-github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
-github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
+github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
-github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
-github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
-github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk=
-github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
-github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.11.0 h1:6Ewdq3tDic1mg5xRO4milcWCfMVQhI4NkqWWvqejpuA=
-golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio=
-golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
-golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/crypto v0.20.0 h1:jmAMJJZXr5KiCw05dfYK9QnqaqKLYXijU23lsEdcQqg=
golang.org/x/crypto v0.20.0/go.mod h1:Xwo95rrVNIoSMx9wa1JroENMToLWn3RNVrTBpLHgZPQ=
-golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220405052023-b1e9470b6e64/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=
-golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
-golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c=
-golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o=
-golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4=
-golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0=
golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk=
-modernc.org/cc/v3 v3.36.0/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI=
-modernc.org/ccgo/v3 v3.0.0-20220428102840-41399a37e894/go.mod h1:eI31LL8EwEBKPpNpA4bU1/i+sKOwOrQy8D87zWUcRZc=
-modernc.org/ccgo/v3 v3.0.0-20220430103911-bc99d88307be/go.mod h1:bwdAnOoaIt8Ax9YdWGjxWsdkPcZyRPHqrOvJxaKAKGw=
-modernc.org/ccgo/v3 v3.16.4/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ=
-modernc.org/ccgo/v3 v3.16.6/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ=
-modernc.org/ccgo/v3 v3.16.8/go.mod h1:zNjwkizS+fIFDrDjIAgBSCLkWbJuHF+ar3QRn+Z9aws=
-modernc.org/ccorpus v1.11.6/go.mod h1:2gEUTrWqdpH2pXsmTM1ZkjeSrUWDpjMu2T6m29L/ErQ=
-modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM=
-modernc.org/libc v0.0.0-20220428101251-2d5f3daf273b/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA=
-modernc.org/libc v1.16.0/go.mod h1:N4LD6DBE9cf+Dzf9buBlzVJndKr/iJHG97vGLHYnb5A=
-modernc.org/libc v1.16.1/go.mod h1:JjJE0eu4yeK7tab2n4S1w8tlWd9MxXLRzheaRnAKymU=
-modernc.org/libc v1.16.17/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU=
-modernc.org/libc v1.16.19 h1:S8flPn5ZeXx6iw/8yNa986hwTQDrY8RXU7tObZuAozo=
-modernc.org/libc v1.16.19/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA=
modernc.org/libc v1.37.6 h1:orZH3c5wmhIQFTXF+Nt+eeauyd+ZIt2BX6ARe+kD+aw=
modernc.org/libc v1.37.6/go.mod h1:YAXkAZ8ktnkCKaN9sw/UDeUVkGYJ/YquGO4FTi5nmHE=
-modernc.org/mathutil v1.2.2/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
-modernc.org/mathutil v1.4.1 h1:ij3fYGe8zBF4Vu+g0oT7mB06r8sqGWKuJu1yXeR4by8=
-modernc.org/mathutil v1.4.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
-modernc.org/memory v1.1.1 h1:bDOL0DIDLQv7bWhP3gMvIrnoFw+Eo6F7a2QK9HPDiFU=
-modernc.org/memory v1.1.1/go.mod h1:/0wo5ibyrQiaoUoH7f9D8dnglAmILJ5/cxZlRECf+Nw=
modernc.org/memory v1.7.2 h1:Klh90S215mmH8c9gO98QxQFsY+W451E8AnzjoE2ee1E=
modernc.org/memory v1.7.2/go.mod h1:NO4NVCQy0N7ln+T9ngWqOQfi7ley4vpwvARR+Hjw95E=
-modernc.org/opt v0.1.1/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0=
-modernc.org/sqlite v1.18.1 h1:ko32eKt3jf7eqIkCgPAeHMBXw3riNSLhl2f3loEF7o8=
-modernc.org/sqlite v1.18.1/go.mod h1:6ho+Gow7oX5V+OiOQ6Tr4xeqbx13UZ6t+Fw9IRUG4d4=
modernc.org/sqlite v1.28.0 h1:Zx+LyDDmXczNnEQdvPuEfcFVA2ZPyaD7UCZDjef3BHQ=
modernc.org/sqlite v1.28.0/go.mod h1:Qxpazz0zH8Z1xCFyi5GSL3FzbtZ3fvbjmywNogldEW0=
-modernc.org/strutil v1.1.1/go.mod h1:DE+MQQ/hjKBZS2zNInV5hhcipt5rLPWkmpbGeW5mmdw=
-modernc.org/tcl v1.13.1/go.mod h1:XOLfOwzhkljL4itZkK6T72ckMgvj0BDsnKNdZVUOecw=
-modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
-modernc.org/z v1.5.1/go.mod h1:eWFB510QWW5Th9YGZT81s+LwvaAs3Q2yr4sP0rmLkv8=
diff --git a/harvest.go b/harvest.go
index 59fdc8e3..115120c9 100644
--- a/harvest.go
+++ b/harvest.go
@@ -77,22 +77,14 @@ func Harvest(cfg *Config, fName string, debug bool) error {
t0 := time.Now()
iTime, reportProgress := time.Now(), false
//fmt.Printf("DEBUG are we using the RDM REST API? %t\n", (cfg.InvenioDbHost == ""))
- if cfg.InvenioDbHost == "" {
+ connStr := cfg.MakeDSN()
+ if connStr == "" {
+ fmt.Printf("WARNING: harvesting through JSON API, extremely slow")
cfg.rl = new(RateLimit)
} else {
cfg.rl = nil
// Need to open our Postgres connection and defer the closing of it.
if cfg.pgDB == nil {
- sslmode := "?sslmode=require"
- if strings.HasPrefix(cfg.InvenioDbHost, "localhost") {
- sslmode = "?sslmode=disable"
- }
- connStr := fmt.Sprintf("postgres://%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- if cfg.InvenioDbPassword != "" {
- connStr = fmt.Sprintf("postgres://%s:%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbPassword, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- }
db, err := sql.Open("postgres", connStr)
if err != nil {
return err
@@ -137,7 +129,7 @@ func Harvest(cfg *Config, fName string, debug bool) error {
}
// NOTE: We need to respect rate limits of RDM API if we're using it!
if cfg.rl != nil {
- fmt.Println("DEBUG we are throttling API access")
+ fmt.Println("WARNING: You we are throttling for the JSON API access. It should use directory Postgres access!")
cfg.rl.Throttle(i, tot)
}
}
diff --git a/index.html b/index.html
index d16fd0fc..223ff58f 100644
--- a/index.html
+++ b/index.html
@@ -84,7 +84,7 @@ Requirements
To building the Go based software and documentation
git
-Go >= 1.20.7
+Go >= 1.22.1
Make (e.g. GNU Make)
Pandoc >= 3
diff --git a/installer.sh b/installer.sh
index 7bfde9b9..dd7a5bd4 100755
--- a/installer.sh
+++ b/installer.sh
@@ -4,7 +4,7 @@
# Set the package name and version to install
#
PACKAGE="irdmtools"
-VERSION="0.0.76"
+VERSION="0.0.78"
GIT_GROUP="caltechlibrary"
RELEASE="https://github.com/$GIT_GROUP/$PACKAGE/releases/tag/v$VERSION"
diff --git a/invenio_api.go b/invenio_api.go
index 6d80c976..3766f4a5 100644
--- a/invenio_api.go
+++ b/invenio_api.go
@@ -529,17 +529,7 @@ func Query(cfg *Config, q string, sort string) ([]map[string]interface{}, error)
// getRecordIdsFromPg will return all record ids found by querying Invenio RDM's Postgres
// database.
func getRecordIdsFromPg(cfg *Config) ([]string, error) {
-
- sslmode := "?sslmode=require"
- if strings.HasPrefix(cfg.InvenioDbHost, "localhost") {
- sslmode = "?sslmode=disable"
- }
- connStr := fmt.Sprintf("postgres://%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- if cfg.InvenioDbPassword != "" {
- connStr = fmt.Sprintf("postgres://%s:%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbPassword, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- }
+ connStr := cfg.MakeDSN()
db, err := sql.Open("postgres", connStr)
if err != nil {
return nil, err
@@ -569,17 +559,7 @@ WHERE json->'access'->>'record' = 'public'
// getRecordStaleIdsFromPg will return all record ids found by querying Invenio RDM's Postgres
// database.
func getRecordStaleIdsFromPg(cfg *Config) ([]string, error) {
-
- sslmode := "?sslmode=require"
- if strings.HasPrefix(cfg.InvenioDbHost, "localhost") {
- sslmode = "?sslmode=disable"
- }
- connStr := fmt.Sprintf("postgres://%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- if cfg.InvenioDbPassword != "" {
- connStr = fmt.Sprintf("postgres://%s:%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbPassword, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- }
+ connStr := cfg.MakeDSN()
db, err := sql.Open("postgres", connStr)
if err != nil {
return nil, err
@@ -605,19 +585,11 @@ WHERE json->'access'->>'record' = 'public'
err = rows.Err()
return keys, err
}
+
// getModifiedRecordIdsFromPg will return of record ids found in date range by querying
// Invenio RDM's Postgres database.
func getModifiedRecordIdsFromPg(cfg *Config, startDate string, endDate string) ([]string, error) {
- sslmode := "?sslmode=require"
- if strings.HasPrefix(cfg.InvenioDbHost, "localhost") {
- sslmode = "?sslmode=disable"
- }
- connStr := fmt.Sprintf("postgres://%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- if cfg.InvenioDbPassword != "" {
- connStr = fmt.Sprintf("postgres://%s:%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbPassword, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- }
+ connStr := cfg.MakeDSN()
db, err := sql.Open("postgres", connStr)
if err != nil {
return nil, err
@@ -668,7 +640,8 @@ WHERE json->'access'->>'record' = 'public' AND (updated between $1 AND $2)`
// NOTE: This method relies on OAI-PMH, this is a rate limited process
// so results can take quiet some time.
func GetRecordIds(cfg *Config) ([]string, error) {
- if cfg.InvenioDbHost != "" && cfg.InvenioDbUser != "" {
+ connStr := cfg.MakeDSN()
+ if connStr != "" {
return getRecordIdsFromPg(cfg)
}
return nil, fmt.Errorf("requires direct Postgres access")
@@ -747,7 +720,8 @@ func GetRecordIds(cfg *Config) ([]string, error) {
// NOTE: This method relies on OAI-PMH, this is a rate limited process
// so results can take quiet some time.
func GetRecordStaleIds(cfg *Config) ([]string, error) {
- if cfg.InvenioDbHost != "" && cfg.InvenioDbUser != "" {
+ connStr := cfg.MakeDSN()
+ if connStr != "" {
return getRecordStaleIdsFromPg(cfg)
}
return nil, fmt.Errorf("requires direct Postgres access")
@@ -770,7 +744,8 @@ func GetModifiedRecordIds(cfg *Config, start string, end string) ([]string, erro
if end == "" {
end = time.Now().Format("2006-01-02")
}
- if cfg.InvenioDbHost != "" && cfg.InvenioDbUser != "" {
+ connStr := cfg.MakeDSN()
+ if connStr != "" {
return getModifiedRecordIdsFromPg(cfg, start, end)
}
return nil, fmt.Errorf("database access to Postgres not configured")
@@ -884,23 +859,14 @@ func getRecordFromPg(cfg *Config, rdmID string, draft bool) (*simplified.Record,
)
if cfg.pgDB != nil {
db = cfg.pgDB
- } else if cfg.pgDB == nil && cfg.InvenioDbHost != "" && cfg.InvenioDbUser != ""{
- sslmode := "?sslmode=require"
- if strings.HasPrefix(cfg.InvenioDbHost, "localhost") {
- sslmode = "?sslmode=disable"
- }
- connStr := fmt.Sprintf("postgres://%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- if cfg.InvenioDbPassword != "" {
- connStr = fmt.Sprintf("postgres://%s:%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbPassword, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- }
+ } else if cfg.pgDB == nil {
+ connStr := cfg.MakeDSN()
db, err = sql.Open("postgres", connStr)
if err != nil {
return nil, err
}
defer db.Close()
- }
+ }
stmt := `SELECT jsonb_strip_nulls(jsonb_build_object(
'created', created::timestamp (0) with time zone,
'updated', updated::timestamp (0) with time zone,
@@ -1011,23 +977,14 @@ func getRecordVersionsFromPg(cfg *Config, rdmID string) ([]*map[string]interface
)
if cfg.pgDB != nil {
db = cfg.pgDB
- } else if cfg.pgDB == nil && cfg.InvenioDbHost != "" && cfg.InvenioDbUser != ""{
- sslmode := "?sslmode=require"
- if strings.HasPrefix(cfg.InvenioDbHost, "localhost") {
- sslmode = "?sslmode=disable"
- }
- connStr := fmt.Sprintf("postgres://%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- if cfg.InvenioDbPassword != "" {
- connStr = fmt.Sprintf("postgres://%s:%s@%s/%s%s",
- cfg.InvenioDbUser, cfg.InvenioDbPassword, cfg.InvenioDbHost, cfg.RepoID, sslmode)
- }
+ } else if cfg.pgDB == nil {
+ connStr := cfg.MakeDSN()
db, err = sql.Open("postgres", connStr)
if err != nil {
return nil, err
}
defer db.Close()
- }
+ }
stmt := `SELECT jsonb_strip_nulls(jsonb_build_object(
'id', json->>'id',
'metadata', json,
@@ -1086,7 +1043,8 @@ ORDER BY version_id`
// }
// ```
func GetRecord(cfg *Config, id string, draft bool) (*simplified.Record, error) {
- if cfg.InvenioDbHost != "" && cfg.InvenioDbUser != "" {
+ connstr := cfg.MakeDSN()
+ if connstr != "" {
return getRecordFromPg(cfg, id, draft)
}
return nil, fmt.Errorf("database access to Postgres not configured")
@@ -1128,7 +1086,8 @@ func GetRecord(cfg *Config, id string, draft bool) (*simplified.Record, error) {
// }
// ```
func GetRecordVersions(cfg *Config, id string) ([]*map[string]interface{}, error) {
- if cfg.InvenioDbHost != "" && cfg.InvenioDbUser != "" {
+ connstr := cfg.MakeDSN()
+ if connstr != "" {
return getRecordVersionsFromPg(cfg, id)
}
return nil, fmt.Errorf("requires Postgres access support")
diff --git a/invenio_api_test.go b/invenio_api_test.go
index 871fbb9f..a642a698 100644
--- a/invenio_api_test.go
+++ b/invenio_api_test.go
@@ -76,13 +76,22 @@ func Test01Config(t *testing.T) {
t.Errorf("tests are not configured")
t.FailNow()
}
+ if err := cfg.LoadEnv("TEST_"); err != nil {
+ t.Error(err)
+ }
+ // For go tooling all Invenio RDM through DB.
+ /*
if cfg.InvenioAPI != "" && cfg.InvenioToken == "" {
t.Errorf("missing an Invenio API Token")
- } else if cfg.InvenioAPI == "" && cfg.InvenioDbHost == "" {
- t.Errorf("expected either Ivenio API or Db Host to be set")
+ }
+ */
+ if cfg.InvenioDbHost == "" && cfg.InvenioDSN == "" {
+ src, _ := JSONMarshalIndent(cfg, "", " ")
+ t.Errorf("expected either Invenio Db Host or DSN to be set via TEST_* environment variables, %s", src)
}
}
+/* NOTE: I am abondoning the Invenio RDM API in favor of direct Postgres access do to rate limiting challenges.
func Test01Query(t *testing.T) {
if useQuery == "" {
useQuery = "gravity"
@@ -116,6 +125,7 @@ func Test01Query(t *testing.T) {
}
}
}
+*/
func Test01GetModifiedRecordIds(t *testing.T) {
if cfg == nil {
@@ -170,6 +180,10 @@ func Test02GetRecord(t *testing.T) {
if cfg == nil || idsFName == "" {
t.Skipf("Not configured for testing")
}
+ cfg := new(Config)
+ if err := cfg.LoadEnv("TEST_"); err != nil {
+ t.Error(err)
+ }
src, err := os.ReadFile(idsFName)
if err != nil {
t.Errorf("failed to read ids from file %q, %s", idsFName, err)
@@ -181,18 +195,23 @@ func Test02GetRecord(t *testing.T) {
t.FailNow()
}
/*
- // Randomize the order of the ids before running GetRecord test.
- rand.Shuffle(len(ids), func(i int, j int) {
- ids[i], ids[j] = ids[j], ids[i]
- })
+ // Randomize the order of the ids before running GetRecord test.
+ rand.Shuffle(len(ids), func(i int, j int) {
+ ids[i], ids[j] = ids[j], ids[i]
+ })
*/
+ t0 := time.Now()
+ iTime := time.Now()
+ tot := len(ids)
+ reportProgress := false
for i, id := range ids {
_, err := GetRecord(cfg, id, false)
if err != nil {
t.Errorf("(%d) GetRecord(cfg, %q, false) %s\n%s", i, id, err, cfg.rl)
t.FailNow()
}
- //cfg.rl.Fprintf(os.Stderr)
- cfg.rl.Throttle(i, len(ids))
+ if iTime, reportProgress = CheckWaitInterval(iTime, (15 * time.Second)); reportProgress || i == 0 {
+ fmt.Fprintf(os.Stderr, "%s %s\n", ProgressIPS(t0, i, time.Second), ProgressETA(t0, i, tot))
+ }
}
}
diff --git a/main_test.go b/main_test.go
index 07f16304..ea74722c 100644
--- a/main_test.go
+++ b/main_test.go
@@ -77,12 +77,14 @@ func TestMain(m *testing.M) {
l.Fatal(err)
}
cfg.Debug = true
+ /* InvenioAPI and Token no longer needed when switching to direct DB access
if cfg.InvenioAPI == "" {
l.Printf("invenio api not configured")
}
if cfg.InvenioToken == "" {
l.Printf("invenio troken not configured")
}
+ */
if idsFName == "" {
idsFName = path.Join("testdata", "test_record_ids.json")
}
diff --git a/pagefind/fragment/unknown_21bfbe7.pf_fragment b/pagefind/fragment/unknown_21bfbe7.pf_fragment
new file mode 100644
index 00000000..8a8f2cba
Binary files /dev/null and b/pagefind/fragment/unknown_21bfbe7.pf_fragment differ
diff --git a/pagefind/fragment/unknown_21f8fea.pf_fragment b/pagefind/fragment/unknown_21f8fea.pf_fragment
new file mode 100644
index 00000000..34562bfb
Binary files /dev/null and b/pagefind/fragment/unknown_21f8fea.pf_fragment differ
diff --git a/pagefind/fragment/unknown_2cc1905.pf_fragment b/pagefind/fragment/unknown_2cc1905.pf_fragment
new file mode 100644
index 00000000..f220a424
Binary files /dev/null and b/pagefind/fragment/unknown_2cc1905.pf_fragment differ
diff --git a/pagefind/fragment/unknown_2fecbe1.pf_fragment b/pagefind/fragment/unknown_2fecbe1.pf_fragment
new file mode 100644
index 00000000..8554a94a
Binary files /dev/null and b/pagefind/fragment/unknown_2fecbe1.pf_fragment differ
diff --git a/pagefind/fragment/unknown_5246635.pf_fragment b/pagefind/fragment/unknown_5246635.pf_fragment
new file mode 100644
index 00000000..19dd600f
Binary files /dev/null and b/pagefind/fragment/unknown_5246635.pf_fragment differ
diff --git a/pagefind/fragment/unknown_5bd22a9.pf_fragment b/pagefind/fragment/unknown_5bd22a9.pf_fragment
new file mode 100644
index 00000000..5d347dd6
Binary files /dev/null and b/pagefind/fragment/unknown_5bd22a9.pf_fragment differ
diff --git a/pagefind/fragment/unknown_5f661fe.pf_fragment b/pagefind/fragment/unknown_5f661fe.pf_fragment
new file mode 100644
index 00000000..48148129
Binary files /dev/null and b/pagefind/fragment/unknown_5f661fe.pf_fragment differ
diff --git a/pagefind/fragment/unknown_84908d8.pf_fragment b/pagefind/fragment/unknown_84908d8.pf_fragment
new file mode 100644
index 00000000..ad070771
Binary files /dev/null and b/pagefind/fragment/unknown_84908d8.pf_fragment differ
diff --git a/pagefind/fragment/unknown_a7db92a.pf_fragment b/pagefind/fragment/unknown_a7db92a.pf_fragment
new file mode 100644
index 00000000..7f8b2cd4
Binary files /dev/null and b/pagefind/fragment/unknown_a7db92a.pf_fragment differ
diff --git a/pagefind/fragment/unknown_aec870b.pf_fragment b/pagefind/fragment/unknown_aec870b.pf_fragment
new file mode 100644
index 00000000..82aa77ef
Binary files /dev/null and b/pagefind/fragment/unknown_aec870b.pf_fragment differ
diff --git a/pagefind/index/unknown_48212ea.pf_index b/pagefind/index/unknown_48212ea.pf_index
new file mode 100644
index 00000000..e9561a9d
Binary files /dev/null and b/pagefind/index/unknown_48212ea.pf_index differ
diff --git a/pagefind/index/unknown_8bacf8c.pf_index b/pagefind/index/unknown_8bacf8c.pf_index
new file mode 100644
index 00000000..4389babb
Binary files /dev/null and b/pagefind/index/unknown_8bacf8c.pf_index differ
diff --git a/pagefind/index/unknown_de203cc.pf_index b/pagefind/index/unknown_de203cc.pf_index
new file mode 100644
index 00000000..5401ec89
Binary files /dev/null and b/pagefind/index/unknown_de203cc.pf_index differ
diff --git a/pagefind/pagefind-entry.json b/pagefind/pagefind-entry.json
index 049228a1..bef168ec 100644
--- a/pagefind/pagefind-entry.json
+++ b/pagefind/pagefind-entry.json
@@ -1 +1 @@
-{"version":"1.0.4","languages":{"unknown":{"hash":"unknown_807dd15bc844e37","wasm":null,"page_count":19}}}
\ No newline at end of file
+{"version":"1.0.4","languages":{"unknown":{"hash":"unknown_b0e08a43a04b12d","wasm":null,"page_count":20}}}
\ No newline at end of file
diff --git a/pagefind/pagefind.unknown_af7558a281e6d66.pf_meta b/pagefind/pagefind.unknown_af7558a281e6d66.pf_meta
new file mode 100644
index 00000000..2879a713
Binary files /dev/null and b/pagefind/pagefind.unknown_af7558a281e6d66.pf_meta differ
diff --git a/pagefind/pagefind.unknown_b0e08a43a04b12d.pf_meta b/pagefind/pagefind.unknown_b0e08a43a04b12d.pf_meta
new file mode 100644
index 00000000..f069af20
Binary files /dev/null and b/pagefind/pagefind.unknown_b0e08a43a04b12d.pf_meta differ
diff --git a/pagefind/pagefind.unknown_d4add029845945c.pf_meta b/pagefind/pagefind.unknown_d4add029845945c.pf_meta
new file mode 100644
index 00000000..2b5326dd
Binary files /dev/null and b/pagefind/pagefind.unknown_d4add029845945c.pf_meta differ
diff --git a/pagefind/pagefind.unknown_f7c9e259e4e5ba5.pf_meta b/pagefind/pagefind.unknown_f7c9e259e4e5ba5.pf_meta
new file mode 100644
index 00000000..219c770e
Binary files /dev/null and b/pagefind/pagefind.unknown_f7c9e259e4e5ba5.pf_meta differ
diff --git a/people2vocabulary.1.md b/people2vocabulary.1.md
index bfa0ebfa..e60a990b 100644
--- a/people2vocabulary.1.md
+++ b/people2vocabulary.1.md
@@ -1,6 +1,6 @@
-%people2vocabulary(1) irdmtools user manual | version 0.0.76 1203a801
+%people2vocabulary(1) irdmtools user manual | version 0.0.78 95ba3295
% R. S. Doiel
-% 2024-03-28
+% 2024-04-12
# NAME
diff --git a/progress.go b/progress.go
index 1fdd51c8..f0b21d96 100644
--- a/progress.go
+++ b/progress.go
@@ -1,8 +1,8 @@
package irdmtools
import (
- "fmt"
- "time"
+ "fmt"
+ "time"
)
// CheckWaitInterval checks to see if an interval of time has been met or exceeded.
@@ -15,20 +15,20 @@ import (
// iTime := time.Now()
// reportProgress := false
//
-// for i, key := range records {
-// // ... process stuff ...
-// if iTime, reportProgress = CheckWaitInterval(rptTime, (30 * time.Second)); reportProgress {
-// log.Printf("%s", ProgressETA(t0, i, tot))
-// }
-// }
+// for i, key := range records {
+// // ... process stuff ...
+// if iTime, reportProgress = CheckWaitInterval(rptTime, (30 * time.Second)); reportProgress {
+// log.Printf("%s", ProgressETA(t0, i, tot))
+// }
+// }
//
// ```
func CheckWaitInterval(iTime time.Time, wait time.Duration) (time.Time, bool) {
- if time.Since(iTime) >= wait {
- iTime = time.Now()
- return iTime, true
- }
- return iTime, false
+ if time.Since(iTime) >= wait {
+ iTime = time.Now()
+ return iTime, true
+ }
+ return iTime, false
}
// ProgressETA returns a string with the percentage processed and estimated time remaining.
@@ -40,25 +40,25 @@ func CheckWaitInterval(iTime time.Time, wait time.Duration) (time.Time, bool) {
// iTime := time.Now()
// reportProgress := false
//
-// for i, key := range records {
-// // ... process stuff ...
-// if iTime, reportProgress = CheckWaitInterval(rptTime, (30 * time.Second)); reportProgress {
-// log.Printf("%s", ProgressETA(t0, i, tot))
-// }
-// }
+// for i, key := range records {
+// // ... process stuff ...
+// if iTime, reportProgress = CheckWaitInterval(rptTime, (30 * time.Second)); reportProgress {
+// log.Printf("%s", ProgressETA(t0, i, tot))
+// }
+// }
//
// ```
func ProgressETA(t0 time.Time, i int, tot int) string {
- if i == 0 {
- return fmt.Sprintf("%.2f%% ETA unknown", 0.0)
- }
- // percent completed
- percent := (float64(i) / float64(tot)) * 100.0
- // running time
- rt := time.Since(t0)
- // estimated time remaining
- eta := time.Duration((float64(rt) / float64(i) * float64(tot)) - float64(rt))
- return fmt.Sprintf("%.2f%% ETA %v", percent, eta.Round(time.Second))
+ if i == 0 {
+ return fmt.Sprintf("%.2f%% ETA unknown", 0.0)
+ }
+ // percent completed
+ percent := (float64(i) / float64(tot)) * 100.0
+ // running time
+ rt := time.Since(t0)
+ // estimated time remaining
+ eta := time.Duration((float64(rt) / float64(i) * float64(tot)) - float64(rt))
+ return fmt.Sprintf("%.2f%% ETA %v", percent, eta.Round(time.Second))
}
// ProgressIPS returns a string with the elapsed time and increments per second.
@@ -69,18 +69,18 @@ func ProgressETA(t0 time.Time, i int, tot int) string {
// iTime := time.Now()
// reportProgress := false
//
-// for i, key := range records {
-// // ... process stuff ...
-// if iTime, reportProgress = CheckWaitInterval(iTime, (30 * time.Second)); reportProgress || i = 0 {
-// log.Printf("%s", ProgressIPS(t0, i, time.Second))
-// }
-// }
+// for i, key := range records {
+// // ... process stuff ...
+// if iTime, reportProgress = CheckWaitInterval(iTime, (30 * time.Second)); reportProgress || i = 0 {
+// log.Printf("%s", ProgressIPS(t0, i, time.Second))
+// }
+// }
//
// ```
func ProgressIPS(t0 time.Time, i int, timeUnit time.Duration) string {
- if i == 0 {
- return fmt.Sprintf("(%d/%s) IPS unknown", i, time.Since(t0).Round(timeUnit))
- }
- ips := float64(i) / float64(time.Since(t0).Seconds())
- return fmt.Sprintf("(%d/%s) IPS %.2f i/sec.", i, time.Since(t0).Round(timeUnit), ips)
+ if i == 0 {
+ return fmt.Sprintf("(%d/%s) IPS unknown", i, time.Since(t0).Round(timeUnit))
+ }
+ ips := float64(i) / float64(time.Since(t0).Seconds())
+ return fmt.Sprintf("(%d/%s) IPS %.2f i/sec.", i, time.Since(t0).Round(timeUnit), ips)
}
diff --git a/rdm2eprint.1.html b/rdm2eprint.1.html
index ee8494f2..4afb4631 100644
--- a/rdm2eprint.1.html
+++ b/rdm2eprint.1.html
@@ -72,6 +72,10 @@ OPTIONS
read from standard input and write crosswalk to standard out.
+-latest
+
+only convert record(s) if latest version.
+
EXAMPLE
Example generating a EPRINT JSON document from RDM would use the
diff --git a/rdm2eprint.1.md b/rdm2eprint.1.md
index 4e2bfb0d..c05aac07 100644
--- a/rdm2eprint.1.md
+++ b/rdm2eprint.1.md
@@ -1,6 +1,6 @@
-%rdm2eprint(1) irdmtools user manual | version 0.0.76 1203a801
+%rdm2eprint(1) irdmtools user manual | version 0.0.78 95ba3295
% R. S. Doiel and Tom Morrell
-% 2024-03-28
+% 2024-04-12
# NAME
@@ -48,6 +48,9 @@ specified by C_NAME.
-pipeline
: read from standard input and write crosswalk to standard out.
+-latest
+: only convert record(s) if latest version.
+
# EXAMPLE
Example generating a EPRINT JSON document from RDM would use the following
diff --git a/rdm2eprint.go b/rdm2eprint.go
index 6cdb9671..9bb3e19e 100644
--- a/rdm2eprint.go
+++ b/rdm2eprint.go
@@ -40,6 +40,7 @@ import (
"fmt"
"io"
"log"
+ "os"
"strconv"
"strings"
"time"
@@ -640,7 +641,7 @@ func usePostgresDB(cfg *Config) bool {
return false
}
-func (app *Rdm2EPrint) Run(in io.Reader, out io.Writer, eout io.Writer, rdmids []string, asXML bool) error {
+func (app *Rdm2EPrint) Run(in io.Reader, out io.Writer, eout io.Writer, rdmids []string, asXML bool, latestVersions bool) error {
eprints := new(eprinttools.EPrints)
if usePostgresDB(app.Cfg) {
cfg := app.Cfg
@@ -666,6 +667,17 @@ func (app *Rdm2EPrint) Run(in io.Reader, out io.Writer, eout io.Writer, rdmids [
if err != nil {
return err
}
+ if latestVersions {
+ fmt.Fprintf(os.Stderr, "DEBUG check src->'versions'->'is_latests' before migrating %+v\n", rec.Versions)
+ }
+ if latestVersions {
+ if rec.Versions == nil {
+ continue
+ }
+ if ! rec.Versions.IsLatest {
+ continue
+ }
+ }
eprint := new(eprinttools.EPrint)
if err := CrosswalkRdmToEPrint(app.Cfg, rec, eprint); err != nil {
return err
@@ -688,7 +700,7 @@ func (app *Rdm2EPrint) Run(in io.Reader, out io.Writer, eout io.Writer, rdmids [
return nil
}
-func (app *Rdm2EPrint) RunHarvest(in io.Reader, out io.Writer, eout io.Writer, cName string, rdmids []string) error {
+func (app *Rdm2EPrint) RunHarvest(in io.Reader, out io.Writer, eout io.Writer, cName string, rdmids []string, latestVersions bool) error {
if len(rdmids) == 0 {
return fmt.Errorf("no RDM ids to process")
}
@@ -728,6 +740,14 @@ func (app *Rdm2EPrint) RunHarvest(in io.Reader, out io.Writer, eout io.Writer, c
log.Printf("Aborting, failed to get record (%d) %s, %s", i, rdmid, err)
return err
}
+ if latestVersions {
+ if rec.Versions == nil {
+ continue
+ }
+ if ! rec.Versions.IsLatest {
+ continue
+ }
+ }
eprint := new(eprinttools.EPrint)
if err := CrosswalkRdmToEPrint(app.Cfg, rec, eprint); err != nil {
log.Printf("Aborting, failed to crosswalk record (%d) %s, %s", i, rdmid, err)
@@ -759,7 +779,7 @@ func (app *Rdm2EPrint) RunHarvest(in io.Reader, out io.Writer, eout io.Writer, c
// Run in pipline mode, e.g. `eprint2rdm XXXXX-XXXXX | rdm2eprint` should round trip the EPrint record
// to RDM then back again. It reads from standard input and writes to standard out.
-func (app *Rdm2EPrint) RunPipeline(in io.Reader, out io.Writer, eout io.Writer, asXML bool) error {
+func (app *Rdm2EPrint) RunPipeline(in io.Reader, out io.Writer, eout io.Writer, asXML bool, latestVersions bool) error {
eprint := new(eprinttools.EPrint)
eprints := new(eprinttools.EPrints)
rec := new(simplified.Record)
@@ -770,6 +790,16 @@ func (app *Rdm2EPrint) RunPipeline(in io.Reader, out io.Writer, eout io.Writer,
if err := JSONUnmarshal(src, &rec); err != nil {
return err
}
+ if latestVersions {
+ if rec.Versions == nil {
+ fmt.Fprintf(out, "\n")
+ return nil
+ }
+ if ! rec.Versions.IsLatest {
+ fmt.Fprintf(out, "\n")
+ return nil
+ }
+ }
if err := CrosswalkRdmToEPrint(app.Cfg, rec, eprint); err != nil {
return err
}
diff --git a/rdmutil.1.html b/rdmutil.1.html
index 1d901135..42227a10 100644
--- a/rdmutil.1.html
+++ b/rdmutil.1.html
@@ -30,23 +30,9 @@
SYNOPSIS
…]
DESCRIPTION
rdmutil is way of interacting with Invenio-RDM
-through its Postgres database (or a copy of that database) or web API
-(JSON and OAI-PMH). It was inspired the RDM JSON API but was implemented
-as a way of quickly processing data without the hinderance of rate
-limits (needed when RDM is public facing). If Postgres access is
-configured then it’ll use SQL to retrieve the JSON data. It still uses
-the JSON API for update operations. rdmutil uses environment variables
-for configuration. For accessing the JSON API it uses the following.
-
-RDM_URL
-
-the URL of the Invenio RDM API and OAI-PMH services
-
-RDMTOK
-
-the token needed to access the Invenio RDM API and OAI-PMH services
-
-
+through its Postgres database. It does NOT use the OAI-PMH API since
+that is far too slow. rdmutil uses environment variables for
+configuration. For accessing the JSON API it uses the following.
dataset related environment variables (i.e. for harvest action)
rdmutil environment variable for storing harvested
content.
@@ -118,10 +104,7 @@ ACTION
get_all_ids
Returns a list of all repository record ids latest versions. The method
-uses OAI-PMH for id retrieval is Postgress access is not configure. That
-can be terribly slow and rate limited. A test instance took 11 minutes
-to retrieve 24000 record ids. If direct Postgres access is setup it
-queries the database directly (much much faster).
+requires Postgres database access.
get_all_stale_ids
diff --git a/rdmutil.1.md b/rdmutil.1.md
index d5ba0839..d122c8f1 100644
--- a/rdmutil.1.md
+++ b/rdmutil.1.md
@@ -1,6 +1,6 @@
-%rdmutil(1) irdmtools user manual | version 0.0.76 1203a801
+%rdmutil(1) irdmtools user manual | version 0.0.78 95ba3295
% R. S. Doiel and Tom Morrell
-% 2024-03-28
+% 2024-04-12
# NAME
@@ -13,19 +13,9 @@ rdmutil [OPTIONS] ACTION [ACTION_PARAMETERS ...]
# DESCRIPTION
__rdmutil__ is way of interacting with Invenio-RDM through its Postgres
-database (or a copy of that database) or web API (JSON and OAI-PMH).
-It was inspired the RDM JSON API but was implemented as a way of quickly
-processing data without the hinderance of rate limits (needed when RDM is
-public facing). If Postgres access is configured then it'll use SQL to
-retrieve the JSON data. It still uses the JSON
-API for update operations. rdmutil uses environment variables for
-configuration. For accessing the JSON API it uses the following.
-
-RDM_URL
-: the URL of the Invenio RDM API and OAI-PMH services
-
-RDMTOK
-: the token needed to access the Invenio RDM API and OAI-PMH services
+database. It does NOT use the OAI-PMH API since that is far too slow.
+rdmutil uses environment variables for configuration. For accessing the
+JSON API it uses the following.
dataset related environment variables (i.e. for harvest action)
@@ -82,11 +72,8 @@ for id retrieval. It is rate limited. Start and end dates are inclusive
and should be specific in YYYY-MM-DD format.
get_all_ids
-: Returns a list of all repository record ids latest versions. The method uses
-OAI-PMH for id retrieval is Postgress access is not configure. That can be
-terribly slow and rate limited. A test instance took 11 minutes to retrieve
-24000 record ids. If direct Postgres access is setup it queries the database
-directly (much much faster).
+: Returns a list of all repository record ids latest versions. The method
+requires Postgres database access.
get_all_stale_ids
: Returns a list of public record ids that are NOT the latest version of the
diff --git a/version.go b/version.go
index 581c7c0e..936272b0 100644
--- a/version.go
+++ b/version.go
@@ -6,13 +6,13 @@ import (
const (
// Version number of release
- Version = "0.0.76"
+ Version = "0.0.78"
// ReleaseDate, the date version.go was generated
- ReleaseDate = "2024-03-28"
+ ReleaseDate = "2024-04-12"
// ReleaseHash, the Git hash when version.go was generated
- ReleaseHash = "1203a801"
+ ReleaseHash = "95ba3295"
LicenseText = `
Redistribution and use in source and binary forms, with or without