Skip to content

Commit

Permalink
Merge pull request #477 from thstieler/feature/force-UTF-8_encoding_f…
Browse files Browse the repository at this point in the history
…or_IPTC

Optionally force UTF-8 encoding for IPTC records
  • Loading branch information
garydgregory authored Jan 21, 2025
2 parents a967e0c + 5d46e94 commit 568e3f6
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -372,12 +372,24 @@ public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final Imagin
}

public byte[] writeIptcBlock(List<IptcRecord> elements) throws ImagingException, IOException {
Charset charset = DEFAULT_CHARSET;
for (final IptcRecord element : elements) {
final byte[] recordData = element.getValue().getBytes(charset);
if (!new String(recordData, charset).equals(element.getValue())) {
charset = StandardCharsets.UTF_8;
break;
return writeIptcBlock(elements, false);
}

public byte[] writeIptcBlock(List<IptcRecord> elements, boolean forceUtf8Encoding) throws ImagingException, IOException {
Charset charset;
if (forceUtf8Encoding) {
// Using UTF-8 is forced
charset = StandardCharsets.UTF_8;
} else {
// Check if all values can be converted to bytes with DEFAULT_CHARSET,
// otherwise use UTF-8
charset = DEFAULT_CHARSET;
for (final IptcRecord element : elements) {
final byte[] recordData = element.getValue().getBytes(charset);
if (!new String(recordData, charset).equals(element.getValue())) {
charset = StandardCharsets.UTF_8;
break;
}
}
}
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,14 @@ public void writeIptc(final ByteSource byteSource, final OutputStream os, Photos
{
// discard old iptc blocks.
final List<IptcBlock> newBlocks = newData.getNonIptcBlocks();
final byte[] newBlockBytes = new IptcParser().writeIptcBlock(newData.getRecords());
final byte[] newBlockBytes = new IptcParser().writeIptcBlock(newData.getRecords(), newData.isForceUtf8Encoding());

final int blockType = IptcConstants.IMAGE_RESOURCE_BLOCK_IPTC_DATA;
final byte[] blockNameBytes = ImagingConstants.EMPTY_BYTE_ARRAY;
final IptcBlock newBlock = new IptcBlock(blockType, blockNameBytes, newBlockBytes);
newBlocks.add(newBlock);

newData = new PhotoshopApp13Data(newData.getRecords(), newBlocks);
newData = new PhotoshopApp13Data(newData.getRecords(), newBlocks, newData.isForceUtf8Encoding());

final byte[] segmentBytes = new IptcParser().writePhotoshopApp13Segment(newData);
final JFIFPieceSegment newSegment = new JFIFPieceSegment(JpegConstants.JPEG_APP13_MARKER, segmentBytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@
import java.util.List;

public class PhotoshopApp13Data {
private final boolean forceUtf8Encoding;
private final List<IptcRecord> records;
private final List<IptcBlock> rawBlocks;

public PhotoshopApp13Data(final List<IptcRecord> records, final List<IptcBlock> rawBlocks) {
public PhotoshopApp13Data(final List<IptcRecord> records, final List<IptcBlock> rawBlocks, final boolean forceUtf8Encoding) {
this.rawBlocks = rawBlocks == null ? Collections.emptyList() : Collections.unmodifiableList(rawBlocks);
this.records = records == null ? Collections.emptyList() : Collections.unmodifiableList(records);
this.forceUtf8Encoding = forceUtf8Encoding;
}

public PhotoshopApp13Data(final List<IptcRecord> records, final List<IptcBlock> rawBlocks) {
this(records, rawBlocks, false);
}

public List<IptcBlock> getNonIptcBlocks() {
Expand All @@ -48,4 +54,8 @@ public List<IptcRecord> getRecords() {
return new ArrayList<>(records);
}

public boolean isForceUtf8Encoding() {
return forceUtf8Encoding;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,26 @@
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.imaging.ImagingException;
import org.apache.commons.imaging.bytesource.ByteSource;
import org.apache.commons.imaging.common.BinaryOutputStream;
import org.apache.commons.imaging.common.GenericImageMetadata.GenericImageMetadataItem;
import org.apache.commons.imaging.formats.jpeg.JpegImageMetadata;
import org.apache.commons.imaging.formats.jpeg.JpegImageParser;
import org.apache.commons.imaging.formats.jpeg.JpegImagingParameters;
import org.apache.commons.imaging.formats.jpeg.JpegPhotoshopMetadata;
import org.apache.commons.imaging.test.TestResources;
import org.apache.commons.lang3.ArrayUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

/**
* Tests for the {#link {@link IptcParser} class.
Expand All @@ -61,6 +67,74 @@ public void testEncodingSupport() throws IOException, ImagingException {
assertArrayEquals("\u8c22\u8c22".getBytes(StandardCharsets.UTF_8), thanksInMandarin.getText().getBytes(StandardCharsets.UTF_8));
}

/**
* Tests the correct encoding when writing IptcRecords with method {@link IptcParser#writeIptcBlock(List, boolean)}. <br />
* The encoding has to be UTF-8, if either the parameter {@code forceUtf8Encoding} is set to true or if a value from the passed {@link IptcRecord} instances
* cannot be represented in charset ISO-8859-1.
*
* @param value the value to test
* @param forceUtf8 if UTF-8 encoding should be forced
*
*/
@ParameterizedTest
@CsvSource({ "äöü ÄÖÜß, true", "äöü ÄÖÜß €, true", "äöü ÄÖÜß, false", "äöü ÄÖÜß €, false" })
public void testEncoding(String value, boolean forceUtf8) throws IOException {

IptcParser parser = new IptcParser();
List<IptcRecord> records = new ArrayList<>();
records.add(new IptcRecord(IptcTypes.CAPTION_ABSTRACT, value));
final Charset charset;

//
byte[] actualBytes = parser.writeIptcBlock(records, forceUtf8);

// Write prefix including (optional)
final byte[] prefix;
try (ByteArrayOutputStream envelopeRecordStream = new ByteArrayOutputStream();
BinaryOutputStream bos = BinaryOutputStream.create(envelopeRecordStream, parser.getByteOrder())) {
if (forceUtf8 || value.contains("€")) {
// Either using UTF-8 is forced of the value contains € (which isn't a character defined in ISO-8859-1):
bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
bos.write(IptcConstants.IPTC_ENVELOPE_RECORD_NUMBER);
bos.write(90); // Constant for "Coded Character Set" record
final byte[] codedCharset = { '\u001B', '%', 'G' };
bos.write2Bytes(codedCharset.length);
bos.write(codedCharset);
charset = StandardCharsets.UTF_8;
} else {
// Use ISO-8859-1 as default charset
charset = StandardCharsets.ISO_8859_1;
}

// Write version record
bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
bos.write(IptcTypes.RECORD_VERSION.type); // record version record
// type.
bos.write2Bytes(2); // record version record size
bos.write2Bytes(2); // record version value
prefix = envelopeRecordStream.toByteArray();
}

final byte[] applicationRecord;
try (ByteArrayOutputStream applicationRecordStream = new ByteArrayOutputStream();
BinaryOutputStream bos = BinaryOutputStream.create(applicationRecordStream, parser.getByteOrder())) {
bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
bos.write(IptcTypes.CAPTION_ABSTRACT.type);
byte[] valueBytes = value.getBytes(charset);
bos.write2Bytes(valueBytes.length);
bos.write(valueBytes);
applicationRecord = applicationRecordStream.toByteArray();
}

byte[] actualPrefix = ArrayUtils.subarray(actualBytes, 0, prefix.length);
byte[] actualApplicationRecord = ArrayUtils.subarray(actualBytes, prefix.length, prefix.length + applicationRecord.length);

assertArrayEquals(prefix, actualPrefix);
assertArrayEquals(applicationRecord, actualApplicationRecord);
}

/**
* Some block types (or Image Resource Blocks in Photoshop specification) have a recommendation to not be interpreted by parsers, as they are handled by
* Photoshop in a special way, that varies by platform (e.g. Mac, Windows, etc).
Expand Down

0 comments on commit 568e3f6

Please sign in to comment.