Skip to content

Commit

Permalink
feat: ep3ds2citation aggregated eprint content
Browse files Browse the repository at this point in the history
  • Loading branch information
rsdoiel committed Mar 15, 2024
1 parent 6139a5b commit 4169bca
Show file tree
Hide file tree
Showing 11 changed files with 43 additions and 75 deletions.
44 changes: 2 additions & 42 deletions cmd/ep3ds2citations/ep3ds2citations.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ import (

// Caltech Library packages
"github.com/caltechlibrary/irdmtools"

// 3rd Party Packages
"gopkg.in/yaml.v3"
)

var (
Expand Down Expand Up @@ -98,12 +95,6 @@ objects will be written.
-host
: Set the base url to use for the records (e.g. authors.library.caltech.edu)
-resource-types
: Use YAML file to map resouce types
-contributor-types
: Use YAML file to map contributor types
# EXAMPLE
Example of a dataset collection called "authors.ds" of EPrint records
Expand Down Expand Up @@ -180,15 +171,13 @@ func main() {
fmtHelp := irdmtools.FmtHelp

showHelp, showVersion, showLicense := false, false, false
idsFName, keysFName, repoHost, resourceFName, contributorFName := "", "-", "", "", ""
idsFName, keysFName, repoHost := "", "-", ""
flag.BoolVar(&showHelp, "help", false, "display help")
flag.BoolVar(&showVersion, "version", false, "display version")
flag.BoolVar(&showLicense, "license", false, "display license")
flag.StringVar(&idsFName, "ids", idsFName, "read ids from a file")
flag.StringVar(&keysFName, "keys", keysFName, "read keys from a file or standard input")
flag.StringVar(&repoHost, "host", repoHost, "repository hostname")
flag.StringVar(&resourceFName, "resource-types", resourceFName, "resource types map in YAML")
flag.StringVar(&contributorFName, "contributor-types", contributorFName, "contributor types map in YAML")

flag.Parse()
args := flag.Args()
Expand Down Expand Up @@ -227,34 +216,5 @@ func main() {
os.Exit(1)
}
}
resourceTypes := map[string]string{}
if resourceFName != "" {
rt, err := os.Open(resourceFName)
if err != nil {
fmt.Fprintf(eout, "failed to open %q, %s\n", resourceFName)
os.Exit(1)
}
defer rt.Close()
decoder := yaml.NewDecoder(rt)
if err := decoder.Decode(resourceTypes); err != nil {
fmt.Fprintf(eout, "failed to parse %q, %s\n", resourceFName, err)
os.Exit(1)
}
}

contributorTypes := map[string]string{}
if contributorFName != "" {
ct, err := os.Open(contributorFName)
if err != nil {
fmt.Fprintf(eout, "failed to open %q, %s\n", contributorFName)
os.Exit(1)
}
defer ct.Close()
decoder := yaml.NewDecoder(ct)
if err := decoder.Decode(contributorTypes); err != nil {
fmt.Fprintf(eout, "failed to parse %q, %s\n", contributorFName, err)
os.Exit(1)
}
}
os.Exit(irdmtools.RunEPrintDSToCitationDS(in, out, eout, args, repoHost, dsIds, resourceTypes, contributorTypes))
os.Exit(irdmtools.RunEPrintDSToCitationDS(in, out, eout, args, repoHost, dsIds))
}
4 changes: 2 additions & 2 deletions doi2rdm.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%doi2rdm(1) irdmtools user manual | version 0.0.71 601fb5b5
%doi2rdm(1) irdmtools user manual | version 0.0.71 6139a5bf
% R. S. Doiel and Tom Morrell
% 2024-03-14
% 2024-03-15

# NAME

Expand Down
10 changes: 2 additions & 8 deletions ep3ds2citations.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%ep3ds2citations(1) irdmtools user manual | version 0.0.71 601fb5b5
%ep3ds2citations(1) irdmtools user manual | version 0.0.71 6139a5bf
% R. S. Doiel and Tom Morrell
% 2024-03-14
% 2024-03-15

# NAME

Expand Down Expand Up @@ -45,12 +45,6 @@ objects will be written.
-host
: Set the base url to use for the records (e.g. authors.library.caltech.edu)

-resource-types
: Use YAML file to map resouce types

-contributor-types
: Use YAML file to map contributor types

# EXAMPLE

Example of a dataset collection called "authors.ds" of EPrint records
Expand Down
4 changes: 2 additions & 2 deletions ep3util.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%ep3util(1) irdmtools user manual | version 0.0.71 601fb5b5
%ep3util(1) irdmtools user manual | version 0.0.71 6139a5bf
% R. S. Doiel and Tom Morrell
% 2024-03-14
% 2024-03-15

# NAME

Expand Down
32 changes: 23 additions & 9 deletions eprint2citation.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log"
"path"
"strings"
"time"

// Caltech Library packages
"github.com/caltechlibrary/dataset/v2"
Expand Down Expand Up @@ -44,7 +45,7 @@ func EPrintToCitation(repoName string, key string, eprint *eprinttools.EPrint, r

// MigrateEPrintDatasetToCitationsDataset takes a dataset of EPrint objects and migrates the ones in the
// id list to a citation dataset collection.
func MigrateEPrintDatasetToCitationDataset(ep3CName string, ids []string, repoHost string, resourceTypes map[string]string, contributorTypes map[string]string, citeCName string) error {
func MigrateEPrintDatasetToCitationDataset(ep3CName string, ids []string, repoHost string, citeCName string) error {
ep3, err := dataset.Open(ep3CName)
if err != nil {
return err
Expand All @@ -55,9 +56,13 @@ func MigrateEPrintDatasetToCitationDataset(ep3CName string, ids []string, repoHo
return err
}
defer cite.Close()
resourceTypes := map[string]string{}
contributorTypes := map[string]string{}
tot := len(ids)
log.Printf("%d/%d citations processed", 0, tot)
for i, id := range ids {
start := time.Now()
i := 0
log.Printf("%d/%d citations processed in %s", i, tot, time.Since(start).Truncate(time.Second).String())
for _, id := range ids {
eprint := new(eprinttools.EPrint)
if err := ep3.ReadObject(id, eprint); err != nil {
log.Printf("failed to get %s (%d), %s", id, i, err)
Expand All @@ -67,9 +72,17 @@ func MigrateEPrintDatasetToCitationDataset(ep3CName string, ids []string, repoHo
if repoName == "" {
repoName = path.Base(strings.TrimSuffix(ep3CName, ".ds"))
}
// NOTE: we want to maintain the contributor type and resource type maps in the existing
// EPrints dataset collection. We do that by acrueing resourceTypes and contributorTypes from
// the eprint record retrieved.
if _, ok := resourceTypes[eprint.Type]; ! ok {
resourceTypes[eprint.Type] = eprint.Type
}

citation, err := EPrintToCitation(repoName, id, eprint, repoHost, resourceTypes, contributorTypes)
if err != nil {
log.Printf("failed to convert %s (%d) to citation, %s", id, i, err)
log.Printf("failed to convert (%d) id %s from %s to citation, %s", i, id, repoName, err)
continue
}
if cite.HasKey(citation.ID) {
err = cite.UpdateObject(citation.ID, citation)
Expand All @@ -79,17 +92,18 @@ func MigrateEPrintDatasetToCitationDataset(ep3CName string, ids []string, repoHo
if err != nil {
log.Printf("failed to save citation for %s (%d), %s", id, i, err)
}
if (i % 100) == 0 {
log.Printf("%d/%d citations processed", i+1, tot)
i++
if (i % 5000) == 0 {
log.Printf("%d/%d citations processed in %s", i, tot, time.Since(start).Truncate(time.Second).String())
}
}
log.Printf("%d/%d citations processed", tot, tot)
log.Printf("%d/%d citations processed in %s", i, tot, time.Since(start).Truncate(time.Second).String())
return nil
}

// RunEPrintDSToCitationDS migrates contents from an EPrint dataset collection to a citation dataset collection for
// a give list of ids and repostiory hostname.
func RunEPrintDSToCitationDS(in io.Reader, out io.Writer, eout io.Writer, args []string, repoHost string, ids []string, resourceTypes map[string]string, contributorTypes map[string]string) int {
func RunEPrintDSToCitationDS(in io.Reader, out io.Writer, eout io.Writer, args []string, repoHost string, ids []string) int {
var (
ep3CName string
citeCName string
Expand All @@ -116,7 +130,7 @@ func RunEPrintDSToCitationDS(in io.Reader, out io.Writer, eout io.Writer, args [
fmt.Fprintf(eout, "no ids to process, aborting\n")
return 1
}
if err := MigrateEPrintDatasetToCitationDataset(ep3CName, keys, repoHost, resourceTypes, contributorTypes, citeCName); err != nil {
if err := MigrateEPrintDatasetToCitationDataset(ep3CName, keys, repoHost, citeCName); err != nil {
fmt.Fprintf(eout, "%s\n", err)
return 1
}
Expand Down
4 changes: 2 additions & 2 deletions eprint2rdm.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%eprint2rdm(1) irdmtools user manual | version 0.0.71 601fb5b5
%eprint2rdm(1) irdmtools user manual | version 0.0.71 6139a5bf
% R. S. Doiel and Tom Morrell
% 2024-03-14
% 2024-03-15

# NAME

Expand Down
4 changes: 2 additions & 2 deletions eprintrest.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%eprintrest(1) irdmtools user manual | version 0.0.71 601fb5b5
%eprintrest(1) irdmtools user manual | version 0.0.71 6139a5bf
% R. S. Doiel and Tom Morrell
% 2024-03-14
% 2024-03-15

# NAME

Expand Down
4 changes: 2 additions & 2 deletions people2vocabulary.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%people2vocabulary(1) irdmtools user manual | version 0.0.71 601fb5b5
%people2vocabulary(1) irdmtools user manual | version 0.0.71 6139a5bf
% R. S. Doiel
% 2024-03-14
% 2024-03-15

# NAME

Expand Down
4 changes: 2 additions & 2 deletions rdm2eprint.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%rdm2eprint(1) irdmtools user manual | version 0.0.71 601fb5b5
%rdm2eprint(1) irdmtools user manual | version 0.0.71 6139a5bf
% R. S. Doiel and Tom Morrell
% 2024-03-14
% 2024-03-15

# NAME

Expand Down
4 changes: 2 additions & 2 deletions rdmutil.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%rdmutil(1) irdmtools user manual | version 0.0.71 601fb5b5
%rdmutil(1) irdmtools user manual | version 0.0.71 6139a5bf
% R. S. Doiel and Tom Morrell
% 2024-03-14
% 2024-03-15

# NAME

Expand Down
4 changes: 2 additions & 2 deletions version.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ const (
Version = "0.0.71"

// ReleaseDate, the date version.go was generated
ReleaseDate = "2024-03-14"
ReleaseDate = "2024-03-15"

// ReleaseHash, the Git hash when version.go was generated
ReleaseHash = "601fb5b5"
ReleaseHash = "6139a5bf"

LicenseText = `
Redistribution and use in source and binary forms, with or without
Expand Down

0 comments on commit 4169bca

Please sign in to comment.