diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index 5e36eac70..9d2501482 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -519,6 +519,14 @@ def safe_params # extract attributes from xml field and merge with attributes provided directly xml = p[:xml].present? ? Base64.decode64(p[:xml]).force_encoding("UTF-8") : nil + if xml.present? + # remove optional utf-8 bom + xml.gsub!("\xEF\xBB\xBF", '') + + # remove leading and trailing whitespace + xml = xml.strip + end + meta = xml.present? ? parse_xml(xml, doi: p[:doi]) : {} xml = meta["string"] diff --git a/app/models/concerns/crosscitable.rb b/app/models/concerns/crosscitable.rb index 0692eb792..6157a35ed 100644 --- a/app/models/concerns/crosscitable.rb +++ b/app/models/concerns/crosscitable.rb @@ -89,7 +89,7 @@ def clean_xml(string) # enforce utf-8 string = string.force_encoding("UTF-8") - rescue ArgumentError, Encoding::CompatibilityError => error + rescue ArgumentError, Encoding::CompatibilityError => exception # convert utf-16 to utf-8 string = string.force_encoding('UTF-16').encode('UTF-8') string.gsub!('encoding="UTF-16"', 'encoding="UTF-8"') @@ -105,7 +105,13 @@ def clean_xml(string) # make sure xml is valid doc = Nokogiri::XML(string) { |config| config.strict.noblanks } - doc.to_xml.strip + doc.to_xml + rescue ArgumentError, Encoding::CompatibilityError => exception + logger = Logger.new(STDOUT) + logger.error "Error " + exception.message + "." + logger.error exception + + nil end def well_formed_xml(string) @@ -120,7 +126,7 @@ def from_xml(string) return nil unless string.start_with?(' Eating your own dog food is a slang term to describe that an organization should itself use the products and services it provides. For DataCite this means that we should use DOIs with appropriate metadata and strategies for long-term preservation for... - \ No newline at end of file + diff --git a/spec/requests/metadata_spec.rb b/spec/requests/metadata_spec.rb index 6311f22f7..f2931675a 100644 --- a/spec/requests/metadata_spec.rb +++ b/spec/requests/metadata_spec.rb @@ -65,7 +65,7 @@ before { post "/dois/#{doi.doi}/metadata", params: valid_attributes.to_json, headers: headers } it 'creates a metadata record' do - expect(Base64.decode64(json.dig('data', 'attributes', 'xml'))).to eq(xml) + expect(Base64.decode64(json.dig('data', 'attributes', 'xml'))).to eq(xml.rstrip) expect(json.dig('data', 'attributes', 'namespace')).to eq("http://datacite.org/schema/kernel-4") end