Skip to content

Commit

Permalink
Extend support for secondary BioSample accessions
Browse files Browse the repository at this point in the history
  • Loading branch information
lmrodriguezr committed Nov 13, 2024
1 parent 32ff6b2 commit 24fa7b1
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 31 deletions.
28 changes: 19 additions & 9 deletions app/models/genome.rb
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,10 @@ def source_attributes
@source_attributes
end

##
# Returns registered BioSample accessions, directly from the database
# if the source database is +:biosample+, or through the external links
# if it is +:sra+
def biosample_accessions
case source_database.try(:to_sym)
when :sra
Expand All @@ -267,6 +271,15 @@ def biosample_accessions
end
end

##
# Returns all BioSample accessions, including secondary (alternative)
# accessions
def biosample_accessions_all
(source_hash.try(:dig, :samples) || {}).values.map do |sample|
sample[:biosample_accessions] || []
end.flatten.uniq
end

def sra_accessions
case source_database.to_sym
when :sra
Expand All @@ -277,11 +290,6 @@ def sra_accessions
end
end

#def sequencing_experiments
# @sequencing_experiments ||=
# SequencingExperiment.by_biosample(biosample_accessions)
#end

def link(acc = nil)
acc ||= accession
case database
Expand Down Expand Up @@ -410,11 +418,13 @@ def link_sequencing_experiments!

# Link experiments that should be here
self.sequencing_experiments +=
SequencingExperiment.where(biosample_accession: biosample_accessions)
.where.not(id: sequencing_experiments.pluck(:id))
SequencingExperiment
.where(biosample_accession: biosample_accessions_all)
.where.not(id: sequencing_experiments.pluck(:id))
self.sequencing_experiments +=
SequencingExperiment.where(biosample_accession_2: biosample_accessions)
.where.not(id: sequencing_experiments.pluck(:id))
SequencingExperiment
.where(biosample_accession_2: biosample_accessions_all)
.where.not(id: sequencing_experiments.pluck(:id))
end
end

Expand Down
38 changes: 25 additions & 13 deletions app/models/genome/external_resources.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ def reload_source_json!
when :biosample
source_accessions.each do |acc|
data[acc] = external_biosample_hash(acc)
external_biosample_to_sra(acc)
data[acc][:biosample_accessions].each do |acc_alt|
external_biosample_to_sra(acc_alt)
end
end
end

self.queued_external = nil
self.source_json = { retrieved_at: DateTime.now, samples: data }.to_json
self.queued_external = nil
save
end

Expand All @@ -56,7 +58,7 @@ def external_biosample_to_sra(acc)

ng = Nokogiri::XML(body)
ng.xpath('//EXPERIMENT_SET/EXPERIMENT').map do |exp|
sra_acc = exp['accession'] || exp.xpath('IDENTIFIERS/PRIMARY_ID').text
sra_acc = exp['accession'] || exp.xpath('//IDENTIFIERS/PRIMARY_ID').text
SequencingExperiment.find_or_create_by(sra_accession: sra_acc) do |se|
se.external_reuse_metadata_xml = true
se.queued_external = nil
Expand All @@ -80,17 +82,24 @@ def external_biosample_hash(acc)
def external_biosample_hash_ebi(acc)
uri = "https://www.ebi.ac.uk/ena/browser/api/xml/#{acc}?includeLinks=false"
body = external_request(uri)
return unless body && body != '{}'
return unless body.present?

ng = Nokogiri::XML(body)
sample = ng.xpath('//SAMPLE_SET/SAMPLE').first or return
{}.tap do |hash|
h = { api: 'EBI' }
h[:title] = ng.xpath('//SAMPLE_SET/SAMPLE/TITLE').text
h[:description] = ng.xpath('//SAMPLE_SET/SAMPLE/DESCRIPTION').text
h[:title] = sample.xpath('//TITLE').text
h[:description] = sample.xpath('//DESCRIPTION').text
h[:attributes] = Hash[
ng.xpath('//SAMPLE_SET/SAMPLE/SAMPLE_ATTRIBUTES/SAMPLE_ATTRIBUTE')
sample.xpath('//SAMPLE_ATTRIBUTES/SAMPLE_ATTRIBUTE')
.map { |attr| [attr.xpath('TAG').text, attr.xpath('VALUE').text] }
]
h[:biosample_accessions] = [
acc, sample['accession'],
sample.xpath('//IDENTIFIERS/PRIMARY_ID').text,
sample.xpath('//IDENTIFIERS/SECONDARY_ID').text,
sample.xpath('//EXTERNAL_ID[@namespace="BioSample"]').text
].select(&:present?).uniq
h.each { |k, v| hash[k] = h[k] if h[k].present? }
end
end
Expand All @@ -101,22 +110,25 @@ def external_biosample_hash_ncbi(acc)
uri = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
"db=biosample&id=#{acc}&rettype=xml&retmode=text"
body = external_request(uri)
return unless body && body != '{}'
return unless body.present?

ng = Nokogiri::XML(body)
sample = ng.xpath('//BioSampleSet/BioSample').first or return
{}.tap do |hash|
h = { api: 'NCBI' }
h[:title] = ng.xpath('//BioSampleSet/BioSample/Description/Title').text
h[:description] =
ng.xpath('//BioSampleSet/BioSample/Description/Comment/Paragraph').text
h[:title] = sample.xpath('//Description/Title').text
h[:description] = sample.xpath('//Description/Comment/Paragraph').text
h[:attributes] = Hash[
ng.xpath('//BioSampleSet/BioSample/Attributes/Attribute')
sample.xpath('//Attributes/Attribute')
.map do |attr|
[attr['harmonized_name'] || attr['attribute_name'], attr.text]
end
]
package = ng.xpath('//BioSampleSet/BioSample/Package').text
package = sample.xpath('//Package').text
h[:attributes][:ncbi_package] = package if package.present?
h[:biosample_accessions] = [
acc, sample['accession'], sample.xpath('//Ids/Id[@db="BioSample"]').text
].compact.uniq
h.each { |k, v| hash[k] = h[k] if h[k].present? }
end
end
Expand Down
12 changes: 7 additions & 5 deletions app/models/sequencing_experiment/external_resources.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,17 @@ def external_sra_to_biosample!
'//EXPERIMENT_SET/EXPERIMENT/DESIGN/SAMPLE_DESCRIPTOR/IDENTIFIERS'
)
biosample_id =
sample_id.xpath('EXTERNAL_ID[@namespace="BioSample"]').map(&:text)
sample_id.xpath('//EXTERNAL_ID[@namespace="BioSample"]')
.first.try(:text)
if biosample_id.present?
self.biosample_accession = biosample_id.first
self.biosample_accession = biosample_id
self.biosample_accession_2 =
sample_id.xpath('PRIMARY_ID').first.try(:text)
sample_id.xpath('//PRIMARY_ID').first.try(:text)
else
self.biosample_accession =
sample_id.xpath('PRIMARY_ID').first.try(:text)
self.biosample_accession_2 = nil
sample_id.xpath('//PRIMARY_ID').first.try(:text)
self.biosample_accession_2 =
sample_id.xpath('//SECONDARY_ID').first.try(:text)
end
else
# Unknown XML specification
Expand Down
8 changes: 5 additions & 3 deletions app/views/genomes/_samples.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,12 @@
<% end %>
</div>
<% end %>
<p class="lead mx-2">
<%= sample[:description] %>
</p>
<p class="lead mx-2"><%= sample[:description] %></p>
<dl class="main-section">
<% if sample[:biosample_accessions].present? %>
<dt>BioSample Accession(s)</dt>
<dd><%= sample[:biosample_accessions].join(', ') %></dd>
<% end %>
<% sample[:attributes].each do |k, v| %>
<dt><%= k %></dt>
<dd><%= v %></dd>
Expand Down
3 changes: 2 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2024_11_11_163435) do
ActiveRecord::Schema.define(version: 2024_11_12_170318) do

create_table "action_text_rich_texts", force: :cascade do |t|
t.string "name", null: false
Expand Down Expand Up @@ -388,6 +388,7 @@
t.datetime "created_at", precision: 6, null: false
t.datetime "updated_at", precision: 6, null: false
t.datetime "retrieved_at"
t.string "biosample_accession_2"
t.index ["sra_accession"], name: "index_sequencing_experiments_on_sra_accession", unique: true
end

Expand Down

0 comments on commit 24fa7b1

Please sign in to comment.