Skip to content

Commit

Permalink
handle namespaced xml in metadata import. #165
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Dec 27, 2018
1 parent 4e9abf8 commit df42577
Show file tree
Hide file tree
Showing 3 changed files with 318 additions and 1 deletion.
3 changes: 2 additions & 1 deletion app/models/concerns/crosscitable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def clean_xml(string)
# remove leading and trailing whitespace
string = string.strip

return nil unless string.start_with?('<?xml version=') || string.start_with?('<resource ')
# handle missing <?xml version="1.0" ?> and additional namespace
return nil unless string.start_with?('<?xml version=') || string.start_with?('<resource ') || /\A<.+:resource/.match(string)

# make sure xml is valid
doc = Nokogiri::XML(string) { |config| config.strict.noblanks }
Expand Down
5 changes: 5 additions & 0 deletions spec/concerns/crosscitable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
expect { subject.clean_xml(string) }.to raise_error(Nokogiri::XML::SyntaxError, "39:18: FATAL: Premature end of data in tag resource line 2")
end

it "clean_xml namespace" do
string = file_fixture('datacite_namespace.xml').read
expect(subject.clean_xml(string)).to start_with('<?xml version="1.0"')
end

it "clean_xml utf-8 bom" do
string = file_fixture('utf-8_bom.xml').read
expect(subject.clean_xml(string)).to start_with('<?xml version="1.0" encoding="UTF-8"?>')
Expand Down
Loading

0 comments on commit df42577

Please sign in to comment.