Skip to content

Commit

Permalink
import researchers linked to dois. #298
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Jun 29, 2019
1 parent 8e35c77 commit 5acdbe4
Show file tree
Hide file tree
Showing 18 changed files with 623 additions and 51 deletions.
136 changes: 136 additions & 0 deletions app/controllers/researchers_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
class ResearchersController < ApplicationController
include ActionController::MimeResponds

prepend_before_action :authenticate_user!
before_action :set_researcher, only: [:show, :destroy]
load_and_authorize_resource except: [:index, :update]

def index
sort = case params[:sort]
when "relevance" then { "_score" => { order: 'desc' }}
when "name" then { "family_name.raw" => { order: 'asc' }}
when "-name" then { "family_name.raw" => { order: 'desc' }}
when "created" then { created_at: { order: 'asc' }}
when "-created" then { created_at: { order: 'desc' }}
else { "family_name.raw" => { order: 'asc' }}
end

page = page_from_params(params)

if params[:id].present?
response = Researcher.find_by_id(params[:id])
elsif params[:ids].present?
response = Researcher.find_by_id(params[:ids], page: page, sort: sort)
else
response = Researcher.query(params[:query], page: page, sort: sort)
end

begin
total = response.results.total
total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0
options = {}
options[:meta] = {
total: total,
"totalPages" => total_pages,
page: page[:number]
}.compact

options[:links] = {
self: request.original_url,
next: response.results.blank? ? nil : request.base_url + "/researchers?" + {
query: params[:query],
"page[number]" => page[:number] + 1,
"page[size]" => page[:size],
sort: params[:sort] }.compact.to_query
}.compact
options[:is_collection] = true

fields = fields_from_params(params)
if fields
render json: ResearcherSerializer.new(response.results, options.merge(fields: fields)).serialized_json, status: :ok
else
render json: ResearcherSerializer.new(response.results, options).serialized_json, status: :ok
end
rescue Elasticsearch::Transport::Transport::Errors::BadRequest => exception
Raven.capture_exception(exception)

message = JSON.parse(exception.message[6..-1]).to_h.dig("error", "root_cause", 0, "reason")

render json: { "errors" => { "title" => message }}.to_json, status: :bad_request
end
end

def show
options = {}
options[:is_collection] = false
render json: ResearcherSerializer.new(@researcher, options).serialized_json, status: :ok
end

def create
logger = Logger.new(STDOUT)
@researcher = Researcher.new(safe_params)
authorize! :create, @researcher

if @researcher.save
options = {}
options[:is_collection] = false
render json: ResearcherSerializer.new(@researcher, options).serialized_json, status: :created
else
logger.warn @researcher.errors.inspect
render json: serialize_errors(@researcher.errors), status: :unprocessable_entity
end
end

def update
logger = Logger.new(STDOUT)
@researcher = Researcher.where(uid: params[:id]).first
exists = @researcher.present?

# create researcher if it doesn't exist already
@researcher = Researcher.new(safe_params.except(:format).merge(uid: params[:id])) unless @researcher.present?

logger.info @researcher.inspect
authorize! :update, @researcher

if @researcher.update_attributes(safe_params)
options = {}
options[:is_collection] = false
render json: ResearcherSerializer.new(@researcher, options).serialized_json, status: exists ? :ok : :created
else
logger.warn @researcher.errors.inspect
render json: serialize_errors(@researcher.errors), status: :unprocessable_entity
end
end

def destroy
logger = Logger.new(STDOUT)
if @researcher.destroy
head :no_content
else
logger.warn @researcher.errors.inspect
render json: serialize_errors(@researcher.errors), status: :unprocessable_entity
end
end

protected

def set_researcher
@researcher = Researcher.where(uid: params[:id]).first
fail ActiveRecord::RecordNotFound unless @researcher.present?
end

private

def safe_params
fail JSON::ParserError, "You need to provide a payload following the JSONAPI spec" unless params[:data].present?
ActiveModelSerializers::Deserialization.jsonapi_parse!(
params,
only: [
:uid, :name, "givenNames", "familyName"
],
keys: {
id: :uid, "givenNames" => :given_names, "familyName" => :family_name
}
)
end
end
73 changes: 73 additions & 0 deletions app/jobs/orcid_auto_update_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
class OrcidAutoUpdateByIdJob < ActiveJob::Base
queue_as :lupo_background

# retry_on ActiveRecord::Deadlocked, wait: 10.seconds, attempts: 3
# retry_on Faraday::TimeoutError, wait: 10.minutes, attempts: 3

# discard_on ActiveJob::DeserializationError

def perform(id)
logger = Logger.new(STDOUT)

orcid = orcid_from_url(id)
return {} unless orcid.present?

# check whether ORCID ID has been registered with DataCite already
result = Researcher.find_by_id(orcid).results.first
return {} unless result.blank?

# otherwise fetch basic ORCID metadata and store with DataCite
url = "https://pub.orcid.org/v2.1/#{orcid}/person"
# if Rails.env.production?
# url = "https://pub.orcid.org/v2.1/#{orcid}/person"
# else
# url = "https://pub.sandbox.orcid.org/v2.1/#{orcid}/person"
# end

response = Maremma.get(url, accept: "application/vnd.orcid+json")
return {} if response.status != 200

message = response.body.fetch("data", {})
attributes = parse_message(message: message)
data = {
"data" => {
"type" => "researchers",
"attributes" => attributes
}
}
url = "http://localhost/researchers/#{orcid}"
response = Maremma.put(url, accept: 'application/vnd.api+json',
content_type: 'application/vnd.api+json',
data: data.to_json,
username: ENV["ADMIN_USERNAME"],
password: ENV["ADMIN_PASSWORD"])

if [200, 201].include?(response.status)
logger.info "ORCID #{orcid} added."
else
puts response.body["errors"].inspect
logger.warn "[Error for ORCID #{orcid}]: " + response.body["errors"].inspect
end
end

def parse_message(message: nil)
given_names = message.dig("name", "given-names", "value")
family_name = message.dig("name", "family-name", "value")
if message.dig("name", "credit-name", "value").present?
name = message.dig("name", "credit-name", "value")
elsif given_names.present? || family_name.present?
name = [given_names, family_name].join(" ")
else
name = nil
end

{
"name" => name,
"givenNames" => given_names,
"familyName" => family_name }.compact
end

def orcid_from_url(url)
Array(/\A(http|https):\/\/orcid\.org\/(.+)/.match(url)).last
end
end
7 changes: 7 additions & 0 deletions app/jobs/orcid_auto_update_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class OrcidAutoUpdateJob < ActiveJob::Base
queue_as :lupo_background

def perform(ids)
ids.each { |id| OrcidAutoUpdateByIdJob.perform_later(id) }
end
end
4 changes: 4 additions & 0 deletions app/models/concerns/indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def query(query, options={})
from = 0
search_after = [options.dig(:page, :cursor)]
sort = [{ created: { order: 'asc' }}]
elsif self.name == "Researcher" && options.dig(:page, :cursor).present?
from = 0
search_after = [options.dig(:page, :cursor)]
sort = [{ created_at: { order: 'asc' }}]
elsif options.dig(:page, :cursor).present?
from = 0
search_after = [options.dig(:page, :cursor)]
Expand Down
26 changes: 26 additions & 0 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,32 @@ def self.update_datacite_crossref(options={})
response.results.total
end

def self.update_datacite_orcid_auto_update(options={})
logger = Logger.new(STDOUT)

size = (options[:size] || 1000).to_i
cursor = (options[:cursor] || 0).to_i

response = Event.query(nil, source_id: "datacite-orcid-auto-update", page: { size: 1, cursor: cursor })
logger.info "[Update] #{response.results.total} events for source datacite-orcid-auto-update."

# walk through results using cursor
if response.results.total > 0
while response.results.results.length > 0 do
response = Event.query(nil, source_id: "datacite-orcid-auto-update", page: { size: size, cursor: cursor })
break unless response.results.results.length > 0

logger.info "[Update] Updating #{response.results.results.length} datacite-orcid-auto-update events starting with _id #{cursor + 1}."
cursor = response.results.to_a.last[:sort].first.to_i

ids = response.results.results.map(&:obj_id).uniq
OrcidAutoUpdateJob.perform_later(ids)
end
end

response.results.total
end

def to_param # overridden, use uuid instead of id
uuid
end
Expand Down
83 changes: 64 additions & 19 deletions app/models/researcher.rb
Original file line number Diff line number Diff line change
@@ -1,30 +1,75 @@
class Researcher
def self.find_by_id(id)
orcid = orcid_from_url(id)
return {} unless orcid.present?
class Researcher < ActiveRecord::Base
# include helper module for Elasticsearch
include Indexable

url = "https://pub.orcid.org/v2.1/#{orcid}/person"
response = Maremma.get(url, accept: "application/vnd.orcid+json")
include Elasticsearch::Model

return {} if response.status != 200
validates_presence_of :uid
validates_uniqueness_of :uid
validates_format_of :email, with: /\A([^@\s]+)@((?:[-a-z0-9]+\.)+[a-z]{2,})\Z/i, if: :email?

# use different index for testing
index_name Rails.env.test? ? "researchers-test" : "researchers"

message = response.body.fetch("data", {})
data = [parse_message(id: id, message: message)]
settings index: {
analysis: {
analyzer: {
string_lowercase: { tokenizer: 'keyword', filter: %w(lowercase ascii_folding) }
},
filter: { ascii_folding: { type: 'asciifolding', preserve_original: true } }
}
} do
mapping dynamic: 'false' do
indexes :id, type: :keyword
indexes :uid, type: :keyword
indexes :name, type: :text, fields: { keyword: { type: "keyword" }, raw: { type: "text", "analyzer": "string_lowercase", "fielddata": true }}
indexes :given_names, type: :text, fields: { keyword: { type: "keyword" }, raw: { type: "text", "analyzer": "string_lowercase", "fielddata": true }}
indexes :family_name, type: :text, fields: { keyword: { type: "keyword" }, raw: { type: "text", "analyzer": "string_lowercase", "fielddata": true }}
indexes :created_at, type: :date
indexes :updated_at, type: :date
end
end

errors = response.body.fetch("errors", nil)
# also index id as workaround for finding the correct key in associations
def as_indexed_json(options={})
{
"id" => uid,
"uid" => uid,
"name" => name,
"given_names" => given_names,
"family_name" => family_name,
"created_at" => created_at,
"updated_at" => updated_at
}
end

{ data: data, errors: errors }
def self.query_fields
['uid^10', 'name^5', 'given_names^5', 'family_name^5', '_all']
end

def self.parse_message(id: nil, message: nil)
{
id: id,
name: message.dig("name", "credit-name", "value"),
"givenName" => message.dig("name", "given-names", "value"),
"familyName" => message.dig("name", "family-name", "value") }.compact
def self.query_aggregations
{}
end

def self.orcid_from_url(url)
Array(/\A(http|https):\/\/orcid\.org\/(.+)/.match(url)).last
# return results for one or more ids
def self.find_by_id(ids, options={})
ids = ids.split(",") if ids.is_a?(String)

options[:page] ||= {}
options[:page][:number] ||= 1
options[:page][:size] ||= 1000
options[:sort] ||= { created_at: { order: "asc" }}

__elasticsearch__.search({
from: (options.dig(:page, :number) - 1) * options.dig(:page, :size),
size: options.dig(:page, :size),
sort: [options[:sort]],
query: {
terms: {
uid: ids
}
},
aggregations: query_aggregations
})
end
end
8 changes: 8 additions & 0 deletions app/serializers/researcher_serializer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class ResearcherSerializer
include FastJsonapi::ObjectSerializer
set_key_transform :camel_lower
set_type :researchers
set_id :uid

attributes :name, :given_names, :family_name, :created_at, :updated_at
end
2 changes: 1 addition & 1 deletion config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
resources :prefixes, constraints: { :id => /.+/ }
end
resources :providers, constraints: { :id => /.+/ }

resources :researchers
resources :resource_types, path: 'resource-types', only: [:show, :index]

# custom routes for maintenance tasks
Expand Down
Loading

0 comments on commit 5acdbe4

Please sign in to comment.