diff --git a/app/models/concerns/crosscitable.rb b/app/models/concerns/crosscitable.rb index 962db7090..0692eb792 100644 --- a/app/models/concerns/crosscitable.rb +++ b/app/models/concerns/crosscitable.rb @@ -83,6 +83,31 @@ def update_xml write_attribute(:xml, xml) end + def clean_xml(string) + begin + return nil unless string.present? + + # enforce utf-8 + string = string.force_encoding("UTF-8") + rescue ArgumentError, Encoding::CompatibilityError => error + # convert utf-16 to utf-8 + string = string.force_encoding('UTF-16').encode('UTF-8') + string.gsub!('encoding="UTF-16"', 'encoding="UTF-8"') + end + + # remove optional bom + string.gsub!("\xEF\xBB\xBF", '') + + # remove leading and trailing whitespace + string = string.strip + + return nil unless string.start_with?('') + end + + it "clean_xml utf-16" do + string = file_fixture('utf-16.xml').read + expect(subject.clean_xml(string)).to start_with('') + end + end + context "from_xml" do it "from_xml" do string = file_fixture('datacite.xml').read diff --git a/spec/fixtures/files/utf-16.xml b/spec/fixtures/files/utf-16.xml new file mode 100644 index 000000000..1541c3ff8 Binary files /dev/null and b/spec/fixtures/files/utf-16.xml differ diff --git a/spec/fixtures/files/utf-8_bom.xml b/spec/fixtures/files/utf-8_bom.xml new file mode 100644 index 000000000..4338047c4 --- /dev/null +++ b/spec/fixtures/files/utf-8_bom.xml @@ -0,0 +1,59 @@ + + +10.25664/art-0226 + + + Fournier, Jacques + Jacques + Fournier + + + Lechat, Christian + Christian + Lechat + + + Courtecuisse, Régis + Régis + Courtecuisse + + + + The genera Kretzschmariella and Nemania (Xylariaceae) in Guadeloupe and Martinique (French West Indies) + +Ascomycete.org +2018 + + 2018-02-25 + +en +Journal Article + + 2100-0840 + + + 10 + 1 + 1-47 + + + 10362 kB + + + PDF + + + Creative Commons BY-NC-ND 3.0 FR + + + This survey deals with the Nemania taxa collected in the French West Indies in the course of an ongoing inventorial work on the mycobiota of these islands initiated in 2003. Based on the evaluation and comparison of their morphological characters, sixteen taxa are described, illustrated and discussed, including seven known taxa, viz.: N. beaumontii, N. bipapillata, N. caries, N. chestersii var. microspora, N. diffusa, N. immersidiscus and N. subaenea. Nine new taxa are proposed, including N. albofarcta, N. colubrina, N. discostoma, N. flavoviridis, N. nivea, N. obscura, N. roseolilacina and N. sericata, and the new combination N. sublutea for a taxon formerly placed in Hypoxylon. The monotypic genus Kretzschmariella, represented by K. culmorum, is included for its morphological resemblance and possible confusion with Nemania. With the exception of K. culmorum and N. bipapillata which were already reported from the Caribbean, all other taxa are new to this region. A dichotomous identification key and a synoptic figure plate of stromata are presented + + + Xylariales + Xylariaceae + Kretzschmariella + Nemania + Martinique + Guadeloupe + +