Skip to content

Commit

Permalink
handle utf-8 bom controller
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Dec 27, 2018
1 parent ae96817 commit 285c8fd
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 5 deletions.
8 changes: 8 additions & 0 deletions app/controllers/dois_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,14 @@ def safe_params
# extract attributes from xml field and merge with attributes provided directly
xml = p[:xml].present? ? Base64.decode64(p[:xml]).force_encoding("UTF-8") : nil

if xml.present?
# remove optional utf-8 bom
xml.gsub!("\xEF\xBB\xBF", '')

# remove leading and trailing whitespace
xml = xml.strip
end

meta = xml.present? ? parse_xml(xml, doi: p[:doi]) : {}
xml = meta["string"]

Expand Down
12 changes: 9 additions & 3 deletions app/models/concerns/crosscitable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def clean_xml(string)

# enforce utf-8
string = string.force_encoding("UTF-8")
rescue ArgumentError, Encoding::CompatibilityError => error
rescue ArgumentError, Encoding::CompatibilityError => exception
# convert utf-16 to utf-8
string = string.force_encoding('UTF-16').encode('UTF-8')
string.gsub!('encoding="UTF-16"', 'encoding="UTF-8"')
Expand All @@ -105,7 +105,13 @@ def clean_xml(string)

# make sure xml is valid
doc = Nokogiri::XML(string) { |config| config.strict.noblanks }
doc.to_xml.strip
doc.to_xml
rescue ArgumentError, Encoding::CompatibilityError => exception
logger = Logger.new(STDOUT)
logger.error "Error " + exception.message + "."
logger.error exception

nil
end

def well_formed_xml(string)
Expand All @@ -120,7 +126,7 @@ def from_xml(string)
return nil unless string.start_with?('<?xml version=') || string.start_with?('<resource ')

doc = Nokogiri::XML(string) { |config| config.strict.noblanks }
doc.to_xml.strip
doc.to_xml
end

def from_json(string)
Expand Down
2 changes: 1 addition & 1 deletion spec/fixtures/files/datacite.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@
<descriptions>
<description descriptionType="Abstract">Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for...</description>
</descriptions>
</resource>
</resource>
2 changes: 1 addition & 1 deletion spec/requests/metadata_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
before { post "/dois/#{doi.doi}/metadata", params: valid_attributes.to_json, headers: headers }

it 'creates a metadata record' do
expect(Base64.decode64(json.dig('data', 'attributes', 'xml'))).to eq(xml)
expect(Base64.decode64(json.dig('data', 'attributes', 'xml'))).to eq(xml.rstrip)
expect(json.dig('data', 'attributes', 'namespace')).to eq("http://datacite.org/schema/kernel-4")
end

Expand Down

0 comments on commit 285c8fd

Please sign in to comment.