Skip to content

Commit

Permalink
Field formatting improvements and JUnit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
plutext committed Aug 11, 2013
1 parent f68b8fa commit 6e72ff3
Show file tree
Hide file tree
Showing 13 changed files with 2,253 additions and 211 deletions.
170 changes: 129 additions & 41 deletions src/main/java/org/docx4j/model/fields/FormattingSwitchHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -198,21 +198,40 @@ private static class FieldResultIsNotADateOrTimeException extends Exception {
public FieldResultIsNotADateOrTimeException(){
super();
}
public FieldResultIsNotADateOrTimeException(String string) {
super(string);
}
}

private static class FieldResultIsNotANumberException extends Exception {

public FieldResultIsNotANumberException(){
super();
}

public FieldResultIsNotANumberException(String string) {
super(string);
}
}

public static String applyFormattingSwitch(FldSimpleModel model, String value) throws Docx4JException {

// date-and-time-formatting-switch: \@
Date date = null;
try {

date = getDate(model, value );

// For \@ date formatting, Word seems to give same results for
// DATE, DOCPROPERTY, and MERGEFIELD,
// but to have a different code path for =

// OK, its a date
if ( model.fldName.equals("=")) {
// For =, today's date is used!
date = new Date();
}

String dtFormat = findFirstSwitchValue("\\@", model.getFldParameters(), true);
log.debug("Applying date format " + dtFormat + " to " + value);

Expand All @@ -223,6 +242,9 @@ public static String applyFormattingSwitch(FldSimpleModel model, String value) t

// TODO .. analyse what Word does
// for now, just leave as is
} else if (dtFormat.equals("")) {
// Verified with Word 2010 sp1 for DOCPROPERTY (very likely others are the same)
return "Error! Switch argument not specified.";
} else {
value = formatDate(model, dtFormat, date);
}
Expand All @@ -233,42 +255,54 @@ public static String applyFormattingSwitch(FldSimpleModel model, String value) t
}

// numeric-formatting-switch: \#
double number;
double number=-1;
if (date==null) {
// It is not a date, so see whether it is a number

try {
number = getNumber(model, value);
String nFormat = findFirstSwitchValue("\\#", model.getFldParameters(), true);
log.debug("Applying number format " + nFormat + " to " + number);
String nFormat = findFirstSwitchValue("\\#", model.getFldParameters(), true);
if (nFormat!=null) {

if (nFormat==null) {
// SPECIFICATION: If no numeric-formatting-switch is present,
// a numeric result is formatted without leading spaces or
// trailing fractional zeros.
// If the result is negative, a leading minus sign is present.
// If the result is a whole number, no radix point is present.
if (nFormat.equals("") ) {
return "Error! Switch argument not specified.";
// Verified with Word 2010 sp1, for =, DOCPROPERTY, MERGEFIELD
// TODO unless it looks like a bookmark, eg {=AA \#} ?

// We'll only honour this if the number is really a number.
// Not, for example, CL.87559-p
try {
number = Double.parseDouble(value);
value = formatNumber(model, "#.##########", number );
} catch (Exception e) {
log.debug(value + " is not a number");
} else {

try {
number = getNumber(model, value);
} catch (FieldResultIsNotANumberException e) {
// Is value a bookmark?
// If so TODO set value to bookmark contents
//Otherwise
log.debug(e.getMessage());
// Word 2010 produces something like: "!Syntax Error"
return "!Syntax Error";
}


} else {
value = formatNumber(model, nFormat, number );


// SPECIFICATION: If no numeric-formatting-switch is present,
// a numeric result is formatted without leading spaces or
// trailing fractional zeros.
// If the result is negative, a leading minus sign is present.
// If the result is a whole number, no radix point is present.

// We'll only honour this if the number is really a number.
// Not, for example, CL.87559-p
// try {
// number = Double.parseDouble(value);
// value = formatNumber(model, "#.##########", number );
// } catch (Exception e) {
// log.debug(value + " is not a number");
// }

// That's commented out, because in reality (Word 2010 sp1),
// if there is no '\#', it is not treated as a number

}

} catch (FieldResultIsNotANumberException e) {
// SPECIFICATION: If the result of a field is not a number,
// the numeric-formatting-switch has no effect

}

}

// general-formatting-switch: \*
Expand Down Expand Up @@ -332,9 +366,6 @@ private static Date getDate(FldSimpleModel model, String dateStr) throws FieldRe

private static double getNumber( FldSimpleModel model, String value) throws FieldResultIsNotANumberException {

// Hmm, Word seems to be happy to extract a number from a DOCPROPERTY string,
// but not from = ... so we should only use NumberExtractor for certain field types?

WordprocessingMLPackage pkg = model.getWordMLPackage();
String decimalSymbol=null;
if (pkg!=null
Expand All @@ -346,6 +377,10 @@ private static double getNumber( FldSimpleModel model, String value) throws Fiel
}
}

// For DOCPROPERTY field, and possibly some others, but not "=",
// Word will parse "€180,000.00 EUR" as a number
if (model.fldName.equals("DOCPROPERTY")
|| model.fldName.equals("MERGEFIELD")) {

// First, parse the value
NumberExtractor nex = new NumberExtractor(decimalSymbol);
Expand All @@ -355,14 +390,17 @@ private static double getNumber( FldSimpleModel model, String value) throws Fiel
// There is no number in this string.
// In this case Word just inserts the non-numeric text,
// without attempting to format the number
throw new FieldResultIsNotANumberException();
}

try {
return Double.parseDouble(value);
} catch (Exception e) {
throw new FieldResultIsNotANumberException();
throw new FieldResultIsNotANumberException("No number in " + value);
}
}

try {
return Double.parseDouble(value);
} catch (Exception e) {
// TODO: is it a bookmark?

throw new FieldResultIsNotANumberException();
}
}

private static String formatNumber( FldSimpleModel model, String wordNumberPattern, double dub)
Expand Down Expand Up @@ -401,6 +439,10 @@ private static String formatNumber( FldSimpleModel model, String wordNumberPatte
char ch = '\0';
char lastCh = '\0';
boolean inLiteral = false;

if ((wordNumberPattern == null) || (wordNumberPattern.length() == 0)) {
wordNumberPattern = "#.##########";
}

if ((wordNumberPattern != null) && (wordNumberPattern.length() > 0)) {

Expand Down Expand Up @@ -531,11 +573,47 @@ else if (ch=='-') {
appendDateItem(buffer, wordNumberPattern.substring(valueStart));
}

if (fillerBeforeDecimalPointCount<1) {

if (fillerBeforeDecimalPointCount<1
&& (wordNumberPattern != null) && (wordNumberPattern.length() > 0))
{
/*
* Word loses the negative sign!
*
=-0.75 \# .###x
WORD: .75
=-.75 \# .###x
WORD: .75
=-0.75 \# .###
WORD: .75
=-.75 \# .###
WORD: .75
=-0.75 \# .000
WORD: .750
=-.75 \# .000
WORD: .750
* Word returns the fractional part only!
=95.4 \# .00
WORD: .40
=95.4 \# .##
WORD: .4
=95.4 \# $###.00
OK
*/


if ( (dub<0) // Word loses the negative sign!
|| (dub>=1) ) // Word returns the fractional part only!
throw new FieldFormattingException("Refusing to replicate Word anomolous result. ");

}

String javaFormatter = buffer.toString();
Expand Down Expand Up @@ -566,9 +644,15 @@ private static String formatGeneral( FldSimpleModel model, String format, String
// are not handled here. It is the responsibility of the calling code
// to handle these.

// Drop surrounding quotes. TODO improve this
if (value.startsWith("\"")) value = value.substring(1);
if (value.endsWith("\"")) value = value.substring(0, value.length()-1);
// FIXME since we don't know whether CHARFORMAT or MERGEFORMAT was actually
// present, we can't implement the following code which mirrors Word:
// if ( format.equals("")) {
// return "Error! Switch argument not specified.";
// }
// so:
if ( format.equals("")) {
return value;
}

// TODO: handle the SMALLCAPS exception!

Expand Down Expand Up @@ -614,6 +698,8 @@ private static String firstCap(String value) {
return "";
} else if (value.length()==1) {
return value.substring(0, 1).toUpperCase();
} else if (value.startsWith("\"")) {
return "\"" + value.substring(1, 2).toUpperCase() + value.substring(2).toLowerCase();
} else { // (value.length()>1)
return value.substring(0, 1).toUpperCase() + value.substring(1).toLowerCase();
}
Expand Down Expand Up @@ -871,6 +957,8 @@ public static String findFirstSwitchValue(String switchDef, List<String> fldPara
switchValue = getSwitchValue(pos + 1, fldParameters);
}
}
// switch was found, so return empty value
switchValue = switchValue==null ? "" : switchValue;
}
return switchValue;
}
Expand Down
20 changes: 18 additions & 2 deletions src/main/java/org/docx4j/model/fields/NumberExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,24 @@ public String extractNumber(String string) throws java.lang.IllegalStateExceptio
try {
Matcher makeMatch = pattern.matcher(
prepare(string));
makeMatch.find();
return makeMatch.group();
if (makeMatch.find() ) {

String matchingSubstring = makeMatch.group();

// Check that there is no
// int pos = string.indexOf(matchingSubstring) + matchingSubstring.length()-1;
// int pos2 = string.indexOf(' ', pos);
//
// if (pos2>pos
// || (pos2<0 && (string.length() - pos > 0) )) {
// // there are alpha chars on the end of the number
// throw new java.lang.IllegalStateException("Not a number");
// }
return matchingSubstring;
} else {
throw new java.lang.IllegalStateException( string + " does not contain a number");
}

} catch (java.lang.IllegalStateException noMatch) {
// This is what Word does
return string;
Expand Down
22 changes: 21 additions & 1 deletion src/main/java/org/docx4j/model/fields/merge/MailMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,9 @@ private static List<Object> performOnInstance(WordprocessingMLPackage input,
fsm.build(instr);
val = FormattingSwitchHelper.applyFormattingSwitch(fsm, val);

gFormat = FormattingSwitchHelper.findFirstSwitchValue("\\*", fsm.getFldParameters(), true);
gFormat = FormattingSwitchHelper.findFirstSwitchValue("\\*", fsm.getFldParameters(), true);
// Solely for potential use in OutputField.AS_FORMTEXT_REGULAR
// We are in fact applying all formatting switches above.

} catch (TransformerException e) {
log.warn("Can't format the field", e);
Expand Down Expand Up @@ -606,6 +608,24 @@ private static String extractInstr(List<Object> instructions) {

if (instructions.size()!=1) {
log.error("TODO MERGEFIELD field contained complex instruction");
/* eg
*
* <w:r>
<w:instrText xml:space="preserve"> MERGEFIELD lasauv</w:instrText>
</w:r>
<w:r>
<w:instrText xml:space="preserve">egarde \* MERGEFORMAT </w:instrText>
</w:r>
for (Object i : instructions) {
i = XmlUtils.unwrap(i);
if (i instanceof Text) {
log.error( ((Text)i).getValue());
} else {
log.error(XmlUtils.marshaltoString(i, true, true) );
}
}
*/
return null;
}

Expand Down
Loading

0 comments on commit 6e72ff3

Please sign in to comment.