optionMaster) {
+ this.optionMaster = optionMaster;
+ }
+
/**
* Parse a list of Zingg command line options.
*
@@ -249,12 +263,13 @@ public final static String getHelp() {
s.append("options\n");
int maxlo = 0;
- for (Option o: optionMaster.values()){
+ ClientOptions co = new ClientOptions();
+ for (Option o: co.optionMaster.values()){
maxlo=Math.max(maxlo,o.optionName.length());
}
int maxld = 0;
- for (Option o: optionMaster.values()){
+ for (Option o: co.optionMaster.values()){
maxld=Math.max(maxld,o.desc.length());
}
@@ -262,7 +277,7 @@ public final static String getHelp() {
formatBuilder.append("\t").append("%-").append(maxlo + 5).append("s").append(": ").append("%-").append(maxld + 5).append("s").append("\n");
String format = formatBuilder.toString();
- for (Option o: optionMaster.values()) {
+ for (Option o: co.optionMaster.values()) {
s.append(String.format(format,o.optionName, o.desc));
}
return s.toString();
@@ -284,9 +299,13 @@ public String getOptionValue(String a) {
return get(a).getValue();
//throw new IllegalArgumentException("Wrong argument");
}
-
-
-
-
+ /** A helper that allows to modify ClientOptions by changing values */
+ public void setOptionValue(String key, String value) {
+ if (has(key)) {
+ OptionWithVal optionWithVal = get(key);
+ optionWithVal.setValue(value);
+ options.put(key, optionWithVal);
+ }
+ }
}
diff --git a/common/client/src/main/java/zingg/common/client/FieldDefUtil.java b/common/client/src/main/java/zingg/common/client/FieldDefUtil.java
new file mode 100644
index 000000000..c8b06a55f
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/FieldDefUtil.java
@@ -0,0 +1,30 @@
+package zingg.common.client;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ *
+ * Util methods related to FieldDefinition objects
+ *
+ */
+public class FieldDefUtil implements Serializable{
+
+ private static final long serialVersionUID = 1L;
+
+ public List extends FieldDefinition> getFieldDefinitionDontUse(List extends FieldDefinition> fieldDefinition) {
+ return fieldDefinition.stream()
+ .filter(x->x.matchType.contains(MatchType.DONT_USE))
+ .collect(Collectors.toList());
+ }
+
+ public List extends FieldDefinition> getFieldDefinitionToUse(List extends FieldDefinition> fieldDefinition) {
+ return fieldDefinition.stream()
+ .filter(x->!x.matchType.contains(MatchType.DONT_USE))
+ .collect(Collectors.toList());
+ }
+
+
+
+}
diff --git a/common/client/src/main/java/zingg/common/client/FieldDefinition.java b/common/client/src/main/java/zingg/common/client/FieldDefinition.java
index 314c6d868..676829a88 100644
--- a/common/client/src/main/java/zingg/common/client/FieldDefinition.java
+++ b/common/client/src/main/java/zingg/common/client/FieldDefinition.java
@@ -10,6 +10,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
@@ -22,6 +23,8 @@
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;
+import zingg.common.client.cols.Named;
+
/**
* This class defines each field that we use in matching We can use this to
@@ -30,7 +33,7 @@
* @author sgoyal
*
*/
-public class FieldDefinition implements
+public class FieldDefinition implements Named,
Serializable {
private static final long serialVersionUID = 1L;
@@ -119,6 +122,21 @@ public void setFieldName(String fieldName) {
this.fieldName = fieldName;
}
+ @JsonIgnore
+ public boolean isDontUse() {
+ return (matchType != null && matchType.contains(MatchType.DONT_USE));
+ }
+
+ @Override
+ public String getName() {
+ return getFieldName();
+ }
+
+ @Override
+ public void setName(String name) {
+ setFieldName(name);
+ }
+
@Override
public int hashCode() {
final int prime = 31;
diff --git a/common/client/src/main/java/zingg/common/client/ILabelDataViewHelper.java b/common/client/src/main/java/zingg/common/client/ILabelDataViewHelper.java
index 89e2ae44f..6385bc7f0 100644
--- a/common/client/src/main/java/zingg/common/client/ILabelDataViewHelper.java
+++ b/common/client/src/main/java/zingg/common/client/ILabelDataViewHelper.java
@@ -8,7 +8,7 @@ public interface ILabelDataViewHelper {
List getClusterIds(ZFrame lines);
- List getDisplayColumns(ZFrame lines, IArguments args);
+// List getDisplayColumns(ZFrame lines, IArguments args);
ZFrame getCurrentPair(ZFrame lines, int index, List clusterIds, ZFrame clusterLines);
diff --git a/common/client/src/main/java/zingg/common/client/IZingg.java b/common/client/src/main/java/zingg/common/client/IZingg.java
index 5e77a04db..61bd8133e 100644
--- a/common/client/src/main/java/zingg/common/client/IZingg.java
+++ b/common/client/src/main/java/zingg/common/client/IZingg.java
@@ -1,17 +1,15 @@
package zingg.common.client;
-import zingg.common.client.license.IZinggLicense;
-
public interface IZingg {
- public void init(IArguments args, IZinggLicense license)
+ public void init(IArguments args, S session)
throws ZinggClientException;
public void execute() throws ZinggClientException;
public void cleanup() throws ZinggClientException;
- public ZinggOptions getZinggOptions();
+ //public ZinggOptions getZinggOptions();
public String getName();
diff --git a/common/client/src/main/java/zingg/common/client/IZinggFactory.java b/common/client/src/main/java/zingg/common/client/IZinggFactory.java
index 427cbf35d..02a4b8d9c 100644
--- a/common/client/src/main/java/zingg/common/client/IZinggFactory.java
+++ b/common/client/src/main/java/zingg/common/client/IZinggFactory.java
@@ -1,9 +1,9 @@
package zingg.common.client;
-import zingg.common.client.IZingg;
+import zingg.common.client.options.ZinggOption;
public interface IZinggFactory {
- public IZingg get(ZinggOptions z) throws InstantiationException, IllegalAccessException, ClassNotFoundException;
+ public IZingg get(ZinggOption z) throws InstantiationException, IllegalAccessException, ClassNotFoundException;
}
diff --git a/common/client/src/main/java/zingg/common/client/Samples.java b/common/client/src/main/java/zingg/common/client/Samples.java
index 1a74c3874..c93fa249a 100644
--- a/common/client/src/main/java/zingg/common/client/Samples.java
+++ b/common/client/src/main/java/zingg/common/client/Samples.java
@@ -1,3 +1,7 @@
+
+
+
+
package zingg.common.client;
import java.io.Serializable;
diff --git a/common/client/src/main/java/zingg/common/client/ZFrame.java b/common/client/src/main/java/zingg/common/client/ZFrame.java
index 1a0861917..b07a264c0 100644
--- a/common/client/src/main/java/zingg/common/client/ZFrame.java
+++ b/common/client/src/main/java/zingg/common/client/ZFrame.java
@@ -20,7 +20,7 @@ public interface ZFrame {
public ZFrame selectExpr(String... col);
public ZFrame distinct();
public List collectAsList();
- public List collectAsListOfStrings();
+ public List collectFirstColumn();
public ZFrame toDF(String[] cols);
public ZFrame toDF(String col1, String col2);
@@ -81,6 +81,9 @@ public interface ZFrame {
public ZFrame repartition(int num);
public ZFrame repartition(int num, C c);
+ public ZFrame repartition(int num,scala.collection.Seq partitionExprs);
+ public ZFrame repartition(scala.collection.Seq partitionExprs);
+
public ZFrame sample(boolean repartition, float num);
public ZFrame sample(boolean repartition, double num);
@@ -170,5 +173,10 @@ public interface ZFrame {
public ZFrame groupByCount(String groupByCol1, String groupByCol2, String countColName);
-
+ public ZFrame intersect(ZFrame other);
+
+ public C substr(C col, int startPos, int len);
+
+ public C gt(C column1, C column2);
+
}
diff --git a/common/client/src/main/java/zingg/common/client/ZSession.java b/common/client/src/main/java/zingg/common/client/ZSession.java
deleted file mode 100644
index 1b778bad7..000000000
--- a/common/client/src/main/java/zingg/common/client/ZSession.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package zingg.common.client;
-
-import zingg.common.client.license.IZinggLicense;
-
-public interface ZSession {
-
- public S getSession();
-
- public void setSession(S session);
-
- public IZinggLicense getLicense();
-
- public void setLicense(IZinggLicense license);
-
-
-}
diff --git a/common/client/src/main/java/zingg/common/client/ZinggOptions.java b/common/client/src/main/java/zingg/common/client/ZinggOptions.java
deleted file mode 100644
index 8c3d32173..000000000
--- a/common/client/src/main/java/zingg/common/client/ZinggOptions.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package zingg.common.client;
-
-import zingg.common.client.util.Util;
-
-public enum ZinggOptions {
-
- TRAIN("train"),
- MATCH("match"),
- TRAIN_MATCH("trainMatch"),
- FIND_TRAINING_DATA("findTrainingData"),
- LABEL("label"),
- LINK("link"),
- GENERATE_DOCS("generateDocs"),
- RECOMMEND("recommend"),
- UPDATE_LABEL("updateLabel"),
- FIND_AND_LABEL("findAndLabel"),
- ASSESS_MODEL("assessModel"),
- PEEK_MODEL("peekModel"),
- EXPORT_MODEL("exportModel"),
- APPROVE_CLUSTERS("approveClusters"),
- RUN_INCREMENTAL("runIncremental");
-
- private String value;
-
- ZinggOptions(String s) {
- this.value = s;
- }
-
- public static String[] getAllZinggOptions() {
- ZinggOptions[] zo = ZinggOptions.values();
- int i = 0;
- String[] s = new String[zo.length];
- for (ZinggOptions z: zo) {
- s[i++] = z.getValue();
- }
- return s;
- }
-
- public String getValue() {
- return value;
- }
-
- public static final ZinggOptions getByValue(String value){
- for (ZinggOptions zo: ZinggOptions.values()) {
- if (zo.value.equals(value)) return zo;
- }
- return null;
- }
-
- public static void verifyPhase(String phase) throws ZinggClientException {
- if (getByValue(phase) == null) {
- String message = "'" + phase + "' is not a valid phase. "
- + "Valid phases are: " + Util.join(getAllZinggOptions(), "|");
- throw new ZinggClientException(message);
- }
- }
-}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/cols/FieldDefSelectedCols.java b/common/client/src/main/java/zingg/common/client/cols/FieldDefSelectedCols.java
new file mode 100644
index 000000000..af5f615a0
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/cols/FieldDefSelectedCols.java
@@ -0,0 +1,44 @@
+package zingg.common.client.cols;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import zingg.common.client.FieldDefinition;
+import zingg.common.client.MatchType;
+
+public class FieldDefSelectedCols extends SelectedCols {
+
+ protected FieldDefSelectedCols() {
+
+ }
+
+ public FieldDefSelectedCols(List extends FieldDefinition> fieldDefs, boolean showConcise) {
+ List colList = getColList(fieldDefs, showConcise);
+ setCols(colList);
+ }
+
+ protected List getColList(List extends FieldDefinition> fieldDefs) {
+ return getColList(fieldDefs,false);
+ }
+
+ protected List getColList(List extends FieldDefinition> fieldDefs, boolean showConcise) {
+ List namedList = new ArrayList();
+
+ for (FieldDefinition fieldDef : fieldDefs) {
+ if (showConcise && fieldDef.isDontUse()) {
+ continue;
+ }
+ namedList.add(fieldDef);
+ }
+ List stringList = convertNamedListToStringList(namedList);
+ return stringList;
+ }
+
+ protected List convertNamedListToStringList(List extends FieldDefinition> namedList) {
+ List stringList = new ArrayList();
+ for (FieldDefinition named : namedList) {
+ stringList.add(named.getName());
+ }
+ return stringList;
+ }
+}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/cols/ISelectedCols.java b/common/client/src/main/java/zingg/common/client/cols/ISelectedCols.java
new file mode 100644
index 000000000..1d48fc945
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/cols/ISelectedCols.java
@@ -0,0 +1,16 @@
+package zingg.common.client.cols;
+
+import java.util.List;
+
+public interface ISelectedCols {
+
+ String[] getCols(List extends Named> n);
+
+ String[] getCols();
+
+ void setCols(List cols);
+
+ void setNamedCols(List extends Named> n);
+
+ void setStringCols(List cols);
+}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/cols/Named.java b/common/client/src/main/java/zingg/common/client/cols/Named.java
new file mode 100644
index 000000000..1fbe2a0a6
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/cols/Named.java
@@ -0,0 +1,8 @@
+package zingg.common.client.cols;
+
+public interface Named {
+
+ String getName();
+
+ void setName(String name);
+}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/cols/PredictionColsSelector.java b/common/client/src/main/java/zingg/common/client/cols/PredictionColsSelector.java
new file mode 100644
index 000000000..71baf980c
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/cols/PredictionColsSelector.java
@@ -0,0 +1,23 @@
+package zingg.common.client.cols;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import zingg.common.client.util.ColName;
+
+public class PredictionColsSelector extends SelectedCols {
+
+ public PredictionColsSelector() {
+
+ List cols = new ArrayList();
+ cols.add(ColName.ID_COL);
+ cols.add(ColName.COL_PREFIX + ColName.ID_COL);
+ cols.add(ColName.PREDICTION_COL);
+ cols.add(ColName.SCORE_COL);
+
+ setCols(cols);
+
+ }
+
+
+}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/cols/SelectedCols.java b/common/client/src/main/java/zingg/common/client/cols/SelectedCols.java
new file mode 100644
index 000000000..106afa534
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/cols/SelectedCols.java
@@ -0,0 +1,37 @@
+package zingg.common.client.cols;
+
+import java.util.List;
+
+public class SelectedCols implements ISelectedCols {
+
+ private String[] cols;
+
+ @Override
+ public String[] getCols(List extends Named> n) {
+ String[] result = new String[n.size()];
+ for (int i = 0; i < n.size(); i++) {
+ result[i] = n.get(i).getName();
+ }
+ return result;
+ }
+
+ @Override
+ public String[] getCols() {
+ return cols;
+ }
+
+ @Override
+ public void setCols(List strings) {
+ this.cols = strings.toArray(new String[0]);
+ }
+
+ @Override
+ public void setNamedCols(List extends Named> n) {
+ this.cols = getCols(n);
+ }
+
+ @Override
+ public void setStringCols(List columnNames) {
+ this.cols = columnNames.toArray(new String[0]);
+ }
+}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/cols/ZidAndFieldDefSelector.java b/common/client/src/main/java/zingg/common/client/cols/ZidAndFieldDefSelector.java
new file mode 100644
index 000000000..62f5aac70
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/cols/ZidAndFieldDefSelector.java
@@ -0,0 +1,24 @@
+package zingg.common.client.cols;
+
+import java.util.List;
+
+import zingg.common.client.FieldDefinition;
+import zingg.common.client.util.ColName;
+
+public class ZidAndFieldDefSelector extends FieldDefSelectedCols {
+
+ public ZidAndFieldDefSelector(List extends FieldDefinition> fieldDefs) {
+ this(fieldDefs, true, false);
+ }
+
+ public ZidAndFieldDefSelector(List extends FieldDefinition> fieldDefs, boolean includeZid, boolean showConcise) {
+ List colList = getColList(fieldDefs, showConcise);
+
+ if (includeZid) colList.add(0, ColName.ID_COL);
+
+ colList.add(ColName.SOURCE_COL);
+
+ setCols(colList);
+ }
+
+}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/event/events/DataCountEvent.java b/common/client/src/main/java/zingg/common/client/event/events/DataCountEvent.java
new file mode 100644
index 000000000..667364863
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/event/events/DataCountEvent.java
@@ -0,0 +1,6 @@
+package zingg.common.client.event.events;
+
+public class DataCountEvent extends IEvent{
+
+ public static final String INPUT_DATA_COUNT = "INPUT_DATA_COUNT";
+}
diff --git a/common/client/src/main/java/zingg/common/client/event/events/IEvent.java b/common/client/src/main/java/zingg/common/client/event/events/IEvent.java
new file mode 100644
index 000000000..6fe90d0f2
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/event/events/IEvent.java
@@ -0,0 +1,25 @@
+package zingg.common.client.event.events;
+
+import java.util.HashMap;
+
+public class IEvent {
+
+ protected HashMap eventDataProps;
+
+ public IEvent() {
+ super();
+ }
+
+ public IEvent(HashMap eventDataProps) {
+ super();
+ this.eventDataProps = eventDataProps;
+ }
+
+ public HashMap getProps(){
+ return eventDataProps;
+ }
+
+ public void setProps(HashMap props){
+ this.eventDataProps = props;
+ }
+}
diff --git a/common/client/src/main/java/zingg/common/client/event/events/ZinggStartEvent.java b/common/client/src/main/java/zingg/common/client/event/events/ZinggStartEvent.java
new file mode 100644
index 000000000..40c15775f
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/event/events/ZinggStartEvent.java
@@ -0,0 +1,5 @@
+package zingg.common.client.event.events;
+
+public class ZinggStartEvent extends IEvent{
+
+}
diff --git a/common/client/src/main/java/zingg/common/client/event/events/ZinggStopEvent.java b/common/client/src/main/java/zingg/common/client/event/events/ZinggStopEvent.java
new file mode 100644
index 000000000..dedeb37bd
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/event/events/ZinggStopEvent.java
@@ -0,0 +1,5 @@
+package zingg.common.client.event.events;
+
+public class ZinggStopEvent extends IEvent{
+
+}
diff --git a/common/client/src/main/java/zingg/common/client/event/listeners/EventsListener.java b/common/client/src/main/java/zingg/common/client/event/listeners/EventsListener.java
new file mode 100644
index 000000000..df4bd73a6
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/event/listeners/EventsListener.java
@@ -0,0 +1,39 @@
+package zingg.common.client.event.listeners;
+
+import java.util.List;
+
+import zingg.common.client.event.events.IEvent;
+import zingg.common.client.util.ListMap;
+
+public class EventsListener {
+ private static EventsListener _eventsListener = new EventsListener();
+ private final ListMap eventListenersList;
+
+ private EventsListener() {
+ eventListenersList = new ListMap();
+ }
+
+ public static EventsListener getInstance() {
+ return _eventsListener;
+ }
+
+ public void addListener(Class extends IEvent> eventClass, IEventListener listener) {
+ eventListenersList.add(eventClass.getCanonicalName(), listener);
+ }
+
+ public void fireEvent(IEvent event) {
+ listen(event);
+ }
+
+ private void listen(IEvent event) {
+ Class extends IEvent> eventClass = event.getClass();
+ List listenerList = eventListenersList.get(eventClass.getCanonicalName());
+ if (listenerList != null) {
+ for (IEventListener listener : listenerList) {
+ if (listener != null) {
+ listener.listen(event);
+ }
+ }
+ }
+ }
+}
diff --git a/common/client/src/main/java/zingg/common/client/event/listeners/IEventListener.java b/common/client/src/main/java/zingg/common/client/event/listeners/IEventListener.java
new file mode 100644
index 000000000..5f45e5082
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/event/listeners/IEventListener.java
@@ -0,0 +1,10 @@
+package zingg.common.client.event.listeners;
+
+import zingg.common.client.event.events.IEvent;
+
+public class IEventListener {
+
+ public void listen(IEvent event) {
+
+ }
+}
diff --git a/common/client/src/main/java/zingg/common/client/event/listeners/ZinggStartListener.java b/common/client/src/main/java/zingg/common/client/event/listeners/ZinggStartListener.java
new file mode 100644
index 000000000..06ed396c7
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/event/listeners/ZinggStartListener.java
@@ -0,0 +1,11 @@
+package zingg.common.client.event.listeners;
+
+import zingg.common.client.event.events.IEvent;
+
+public class ZinggStartListener extends IEventListener {
+
+ @Override
+ public void listen(IEvent event) {
+ }
+
+}
diff --git a/common/client/src/main/java/zingg/common/client/event/listeners/ZinggStopListener.java b/common/client/src/main/java/zingg/common/client/event/listeners/ZinggStopListener.java
new file mode 100644
index 000000000..9d161dfb9
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/event/listeners/ZinggStopListener.java
@@ -0,0 +1,10 @@
+package zingg.common.client.event.listeners;
+
+import zingg.common.client.event.events.IEvent;
+
+public class ZinggStopListener extends IEventListener {
+
+ @Override
+ public void listen(IEvent event) {
+ }
+}
diff --git a/common/client/src/main/java/zingg/common/client/license/ILicenseValidator.java b/common/client/src/main/java/zingg/common/client/license/ILicenseValidator.java
deleted file mode 100644
index 92fa47a37..000000000
--- a/common/client/src/main/java/zingg/common/client/license/ILicenseValidator.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package zingg.common.client.license;
-
-import java.util.Properties;
-
-public interface ILicenseValidator {
-
- public boolean validate();
-
- public Properties getLicenseProps();
-
- public void setLicenseProps(Properties licenseProps);
-
- public String getKey();
-
- public void setKey(String key);
-
- public String getValToCheck();
-
- public void setValToCheck(String valToCheck);
-
- public String getName();
-
- public void setName(String name);
-
-}
diff --git a/common/client/src/main/java/zingg/common/client/license/IZinggLicense.java b/common/client/src/main/java/zingg/common/client/license/IZinggLicense.java
deleted file mode 100644
index 761b5aedb..000000000
--- a/common/client/src/main/java/zingg/common/client/license/IZinggLicense.java
+++ /dev/null
@@ -1,11 +0,0 @@
-package zingg.common.client.license;
-
-import java.util.Properties;
-
-public interface IZinggLicense {
-
- public ILicenseValidator getValidator(String name);
-
- public Properties getLicenseProps();
-
-}
diff --git a/common/client/src/main/java/zingg/common/client/options/ZinggOption.java b/common/client/src/main/java/zingg/common/client/options/ZinggOption.java
new file mode 100644
index 000000000..2b3ba2999
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/options/ZinggOption.java
@@ -0,0 +1,19 @@
+package zingg.common.client.options;
+
+public class ZinggOption {
+ String name;
+
+ public ZinggOption(String name) {
+ this.name = name;
+ ZinggOptions.put(this);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public String toString(){
+ return name;
+ }
+}
diff --git a/common/client/src/main/java/zingg/common/client/options/ZinggOptions.java b/common/client/src/main/java/zingg/common/client/options/ZinggOptions.java
new file mode 100644
index 000000000..d4c98ed1e
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/options/ZinggOptions.java
@@ -0,0 +1,66 @@
+package zingg.common.client.options;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import zingg.common.client.ZinggClientException;
+import zingg.common.client.util.Util;
+
+public class ZinggOptions {
+
+ public final static ZinggOption TRAIN = new ZinggOption("train");
+ public final static ZinggOption MATCH = new ZinggOption("match");
+ public final static ZinggOption TRAIN_MATCH = new ZinggOption("trainMatch");
+ public final static ZinggOption FIND_TRAINING_DATA = new ZinggOption("findTrainingData");
+ public final static ZinggOption LABEL = new ZinggOption("label");
+ public final static ZinggOption LINK = new ZinggOption("link");
+ public final static ZinggOption GENERATE_DOCS = new ZinggOption("generateDocs");
+ public final static ZinggOption RECOMMEND = new ZinggOption("recommend");
+ public final static ZinggOption UPDATE_LABEL = new ZinggOption("updateLabel");
+ public final static ZinggOption FIND_AND_LABEL = new ZinggOption("findAndLabel");
+ public final static ZinggOption ASSESS_MODEL = new ZinggOption("assessModel");
+ public final static ZinggOption PEEK_MODEL = new ZinggOption("peekModel");
+ public final static ZinggOption EXPORT_MODEL = new ZinggOption("exportModel");
+
+ public static Map allZinggOptions;// = new HashMap();
+
+
+
+ protected ZinggOptions() {
+ }
+
+ public static final void put(ZinggOption o) {
+ if (allZinggOptions == null) {
+ allZinggOptions = new HashMap();
+ }
+ allZinggOptions.put(o.getName(), o);
+ }
+
+
+
+ public static String[] getAllZinggOptions() {
+ ZinggOption[] zo = allZinggOptions.values().toArray(new ZinggOption[allZinggOptions.size()]);
+ int i = 0;
+ String[] s = new String[zo.length];
+ for (ZinggOption z: zo) {
+ s[i++] = z.getName();
+ }
+ return s;
+ }
+
+
+ public static final ZinggOption getByValue(String value){
+ for (ZinggOption zo: ZinggOptions.allZinggOptions.values()) {
+ if (zo.name.equals(value)) return zo;
+ }
+ return null;
+ }
+
+ public static void verifyPhase(String phase) throws ZinggClientException {
+ if (getByValue(phase) == null) {
+ String message = "'" + phase + "' is not a valid phase. "
+ + "Valid phases are: " + Util.join(getAllZinggOptions(), "|");
+ throw new ZinggClientException(message);
+ }
+ }
+}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/util/DFObjectUtil.java b/common/client/src/main/java/zingg/common/client/util/DFObjectUtil.java
new file mode 100644
index 000000000..c0ae8bd89
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/util/DFObjectUtil.java
@@ -0,0 +1,17 @@
+package zingg.common.client.util;
+
+import java.util.List;
+
+import zingg.common.client.ZFrame;
+
+public abstract class DFObjectUtil {
+
+ protected final IWithSession iWithSession;
+
+ protected DFObjectUtil(IWithSession iWithSession) {
+ this.iWithSession = iWithSession;
+ }
+
+ public abstract ZFrame getDFFromObjectList(List objList, Class objClass) throws Exception;
+
+}
diff --git a/common/core/src/main/java/zingg/common/core/util/DFReader.java b/common/client/src/main/java/zingg/common/client/util/DFReader.java
similarity index 93%
rename from common/core/src/main/java/zingg/common/core/util/DFReader.java
rename to common/client/src/main/java/zingg/common/client/util/DFReader.java
index 6bf84940b..89f867752 100644
--- a/common/core/src/main/java/zingg/common/core/util/DFReader.java
+++ b/common/client/src/main/java/zingg/common/client/util/DFReader.java
@@ -1,4 +1,4 @@
-package zingg.common.core.util;
+package zingg.common.client.util;
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
diff --git a/common/core/src/main/java/zingg/common/core/util/DFWriter.java b/common/client/src/main/java/zingg/common/client/util/DFWriter.java
similarity index 87%
rename from common/core/src/main/java/zingg/common/core/util/DFWriter.java
rename to common/client/src/main/java/zingg/common/client/util/DFWriter.java
index c41e97196..9ddbfc88f 100644
--- a/common/core/src/main/java/zingg/common/core/util/DFWriter.java
+++ b/common/client/src/main/java/zingg/common/client/util/DFWriter.java
@@ -1,4 +1,4 @@
-package zingg.common.core.util;
+package zingg.common.client.util;
public interface DFWriter {
diff --git a/common/core/src/main/java/zingg/common/core/util/DSUtil.java b/common/client/src/main/java/zingg/common/client/util/DSUtil.java
similarity index 97%
rename from common/core/src/main/java/zingg/common/core/util/DSUtil.java
rename to common/client/src/main/java/zingg/common/client/util/DSUtil.java
index 6c6d0721b..f8d2f8108 100644
--- a/common/core/src/main/java/zingg/common/core/util/DSUtil.java
+++ b/common/client/src/main/java/zingg/common/client/util/DSUtil.java
@@ -1,4 +1,4 @@
-package zingg.common.core.util;
+package zingg.common.client.util;
import zingg.common.client.FieldDefinition;
@@ -7,8 +7,6 @@
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
import zingg.common.client.pipe.Pipe;
-import zingg.common.client.util.ColName;
-import zingg.common.client.util.ColValues;
import java.util.ArrayList;
import java.util.List;
@@ -43,7 +41,12 @@ public static final String[] getPrefixedColumns(String[] cols) {
}
public ZFrame getPrefixedColumnsDS(ZFrame lines) {
- return lines.toDF(getPrefixedColumns(lines.columns()));
+ try {
+ return lines.toDF(getPrefixedColumns(lines.columns()));
+ } catch (Exception e) {
+ LOG.error("Please ensure that the 'ftd' and 'label' processes are executed before initiating the training phase");
+ throw e;
+ }
}
diff --git a/common/client/src/main/java/zingg/common/client/util/IWithSession.java b/common/client/src/main/java/zingg/common/client/util/IWithSession.java
new file mode 100644
index 000000000..470405c38
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/util/IWithSession.java
@@ -0,0 +1,9 @@
+package zingg.common.client.util;
+
+public interface IWithSession {
+
+ public void setSession(S s);
+
+ public S getSession();
+
+}
\ No newline at end of file
diff --git a/common/client/src/main/java/zingg/common/client/util/JsonStringify.java b/common/client/src/main/java/zingg/common/client/util/JsonStringify.java
new file mode 100644
index 000000000..848155e83
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/util/JsonStringify.java
@@ -0,0 +1,27 @@
+package zingg.common.client.util;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import zingg.common.client.Arguments;
+import zingg.common.client.ArgumentsUtil;
+
+public class JsonStringify {
+ public static String toString (Object o){
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true);
+ //mapper.configure(JsonParser.Feature.FAIL_ON_EMPTY_BEANS, true)
+ try {
+ StringWriter writer = new StringWriter();
+ return mapper.writeValueAsString(o);
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+}
diff --git a/common/core/src/main/java/zingg/common/core/util/PipeUtil.java b/common/client/src/main/java/zingg/common/client/util/PipeUtil.java
similarity index 99%
rename from common/core/src/main/java/zingg/common/core/util/PipeUtil.java
rename to common/client/src/main/java/zingg/common/client/util/PipeUtil.java
index b76f8a371..415a4e36a 100644
--- a/common/core/src/main/java/zingg/common/core/util/PipeUtil.java
+++ b/common/client/src/main/java/zingg/common/client/util/PipeUtil.java
@@ -1,4 +1,4 @@
-package zingg.common.core.util;
+package zingg.common.client.util;
import java.util.Arrays;
import java.util.stream.Collectors;
@@ -12,7 +12,6 @@
import zingg.common.client.pipe.FilePipe;
//import zingg.common.client.pipe.InMemoryPipe;
import zingg.common.client.pipe.Pipe;
-import zingg.common.client.util.ColName;
//import com.datastax.spark.connector.cql.*;
//import org.elasticsearch.spark.sql.api.java.JavaEsSparkSQL;
@@ -185,7 +184,7 @@ public ZFrame read(boolean addExtraCol, boolean addLineNo, int numPartit
return rows;
}
- public void write(ZFrame toWriteOrig, IArguments args,
+ public void write(ZFrame toWriteOrig,
Pipe... pipes) throws ZinggClientException {
try {
for (Pipe p: pipes) {
diff --git a/common/core/src/main/java/zingg/common/core/util/PipeUtilBase.java b/common/client/src/main/java/zingg/common/client/util/PipeUtilBase.java
similarity index 93%
rename from common/core/src/main/java/zingg/common/core/util/PipeUtilBase.java
rename to common/client/src/main/java/zingg/common/client/util/PipeUtilBase.java
index bdb363a2b..b293d0b71 100644
--- a/common/core/src/main/java/zingg/common/core/util/PipeUtilBase.java
+++ b/common/client/src/main/java/zingg/common/client/util/PipeUtilBase.java
@@ -1,4 +1,4 @@
-package zingg.common.core.util;
+package zingg.common.client.util;
import zingg.common.client.IArguments;
import zingg.common.client.ZFrame;
@@ -29,7 +29,7 @@ public ZFrame read(boolean addLineNo, int numPartitions,
public ZFrame read(boolean addExtraCol, boolean addLineNo, int numPartitions,
boolean addSource, Pipe... pipes) throws ZinggClientException;
- public void write(ZFrame toWriteOrig, IArguments args, Pipe... pipes)
+ public void write(ZFrame toWriteOrig, Pipe... pipes)
throws ZinggClientException;
diff --git a/common/client/src/main/java/zingg/common/client/util/PojoToArrayConverter.java b/common/client/src/main/java/zingg/common/client/util/PojoToArrayConverter.java
new file mode 100644
index 000000000..a04e60b68
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/util/PojoToArrayConverter.java
@@ -0,0 +1,40 @@
+package zingg.common.client.util;
+
+import java.lang.reflect.Field;
+
+public class PojoToArrayConverter {
+
+ public static Object[] getObjectArray(Object object) throws IllegalAccessException {
+ Field[] fieldsInChildClass = object.getClass().getDeclaredFields();
+ Field[] fieldsInParentClass = null;
+
+ int fieldCountInChildClass = fieldsInChildClass.length;
+ int fieldCount = fieldCountInChildClass;
+
+ if (object.getClass().getSuperclass() != null) {
+ fieldCount += object.getClass().getSuperclass().getDeclaredFields().length;
+ fieldsInParentClass = object.getClass().getSuperclass().getDeclaredFields();
+ }
+
+ //fieldCount = fieldCountChild + fieldCountParent
+ Object[] objArr = new Object[fieldCount];
+
+ int idx = 0;
+
+ //iterate through child class fields
+ for (; idx < fieldCountInChildClass; idx++) {
+ Field field = fieldsInChildClass[idx];
+ field.setAccessible(true);
+ objArr[idx] = field.get(object);
+ }
+
+ //iterate through super class fields
+ for (; idx < fieldCount; idx++) {
+ Field field = fieldsInParentClass[idx - fieldCountInChildClass];
+ field.setAccessible(true);
+ objArr[idx] = field.get(object);
+ }
+
+ return objArr;
+ }
+}
diff --git a/common/client/src/main/java/zingg/common/client/util/StructTypeFromPojoClass.java b/common/client/src/main/java/zingg/common/client/util/StructTypeFromPojoClass.java
new file mode 100644
index 000000000..4b3de89bb
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/util/StructTypeFromPojoClass.java
@@ -0,0 +1,34 @@
+package zingg.common.client.util;
+
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.List;
+
+public abstract class StructTypeFromPojoClass {
+
+ public abstract ST getStructType(Class> objClass) throws Exception;
+
+ public List getFields(Class> objClass) {
+ List structFields = new ArrayList();
+ Field[] fields = objClass.getDeclaredFields();
+
+ //add child class fields in struct
+ for (Field f : fields) {
+ structFields.add(getStructField(f));
+ }
+
+ //add parent class fields in struct
+ if (objClass.getSuperclass() != null) {
+ Field[] fieldsSuper = objClass.getSuperclass().getDeclaredFields();
+ for (Field f : fieldsSuper) {
+ structFields.add(getStructField(f));
+ }
+ }
+ return structFields;
+ }
+
+ public abstract SF getStructField(Field field);
+
+ public abstract T getSFType(Class> t);
+
+}
diff --git a/common/client/src/main/java/zingg/common/client/util/WithSession.java b/common/client/src/main/java/zingg/common/client/util/WithSession.java
new file mode 100644
index 000000000..e3d0612b9
--- /dev/null
+++ b/common/client/src/main/java/zingg/common/client/util/WithSession.java
@@ -0,0 +1,15 @@
+package zingg.common.client.util;
+
+public class WithSession implements IWithSession {
+
+ S session;
+ @Override
+ public void setSession(S session) {
+ this.session = session;
+ }
+
+ @Override
+ public S getSession() {
+ return session;
+ }
+}
diff --git a/common/client/src/test/java/zingg/common/client/TestArguments.java b/common/client/src/test/java/zingg/common/client/TestArguments.java
index 231464c44..0a75b4d2d 100644
--- a/common/client/src/test/java/zingg/common/client/TestArguments.java
+++ b/common/client/src/test/java/zingg/common/client/TestArguments.java
@@ -9,6 +9,7 @@
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -25,6 +26,7 @@ public class TestArguments {
public static final Log LOG = LogFactory.getLog(TestArguments.class);
protected ArgumentsUtil argsUtil = new ArgumentsUtil();
+
@Test
public void testSubstituteVariablesWithAllEnvVarSet() {
try {
@@ -242,8 +244,33 @@ public void testMatchTypeWrong() {
}
-
-
-
+
+ @Test
+ public void testJsonStringify(){
+ IArguments argsFromJsonFile;
+ try{
+ //Converting to JSON using toString()
+ argsFromJsonFile = argsUtil.createArgumentsFromJSON(getClass().getResource("../../../testArguments/configWithMultipleMatchTypesUnsupported.json").getFile(), "test");
+ String strFromJsonFile = argsFromJsonFile.toString();
+
+ IArguments argsFullCycle = argsUtil.createArgumentsFromJSONString(strFromJsonFile, "");
+
+ assertEquals(argsFullCycle.getFieldDefinition().get(0), argsFromJsonFile.getFieldDefinition().get(0));
+ assertEquals(argsFullCycle.getFieldDefinition().get(2), argsFromJsonFile.getFieldDefinition().get(2));
+ assertEquals(argsFullCycle.getModelId(), argsFromJsonFile.getModelId());
+ assertEquals(argsFullCycle.getZinggModelDir(), argsFromJsonFile.getZinggModelDir());
+ assertEquals(argsFullCycle.getNumPartitions(), argsFromJsonFile.getNumPartitions());
+ assertEquals(argsFullCycle.getLabelDataSampleSize() ,argsFromJsonFile.getLabelDataSampleSize());
+ assertEquals(argsFullCycle.getTrainingSamples(),argsFromJsonFile.getTrainingSamples());
+ assertEquals(argsFullCycle.getOutput(),argsFromJsonFile.getOutput());
+ assertEquals(argsFullCycle.getData(),argsFromJsonFile.getData());
+ assertEquals(argsFullCycle.getZinggDir(),argsFromJsonFile.getZinggDir());
+ assertEquals(argsFullCycle.getJobId(),argsFromJsonFile.getJobId());
+
+ } catch (Exception | ZinggClientException e) {
+ e.printStackTrace();
+ }
+
+ }
}
diff --git a/common/client/src/test/java/zingg/common/client/TestClient.java b/common/client/src/test/java/zingg/common/client/TestClient.java
index e22ff3c21..5a3befd85 100644
--- a/common/client/src/test/java/zingg/common/client/TestClient.java
+++ b/common/client/src/test/java/zingg/common/client/TestClient.java
@@ -6,6 +6,8 @@
import org.apache.commons.logging.LogFactory;
import org.junit.jupiter.api.Test;
+import zingg.common.client.options.ZinggOptions;
+
public class TestClient {
public static final Log LOG = LogFactory.getLog(TestClient.class);
diff --git a/common/client/src/test/java/zingg/common/client/TestFieldDefUtil.java b/common/client/src/test/java/zingg/common/client/TestFieldDefUtil.java
new file mode 100644
index 000000000..3d78d4618
--- /dev/null
+++ b/common/client/src/test/java/zingg/common/client/TestFieldDefUtil.java
@@ -0,0 +1,41 @@
+package zingg.common.client;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.junit.jupiter.api.Test;
+
+
+public class TestFieldDefUtil {
+
+ public static final Log LOG = LogFactory.getLog(TestFieldDefUtil.class);
+ protected ArgumentsUtil argsUtil = new ArgumentsUtil();
+
+ protected FieldDefUtil fieldDefUtil = new FieldDefUtil();
+
+ @Test
+ public void testMatchTypeFilter() {
+ IArguments args;
+ try {
+ args = argsUtil.createArgumentsFromJSON(getClass().getResource("../../../testArguments/configTestDontUse.json").getFile(), "test");
+
+ List extends FieldDefinition> dontUseList = fieldDefUtil.getFieldDefinitionDontUse(args.getFieldDefinition());
+ assertEquals(dontUseList.size(), 3);
+
+ List extends FieldDefinition> matchList = fieldDefUtil.getFieldDefinitionToUse(args.getFieldDefinition());
+ assertEquals(matchList.size(), 4);
+
+ } catch (Exception | ZinggClientException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ fail("Could not read config");
+ }
+
+ }
+
+
+}
diff --git a/common/client/src/test/java/zingg/common/client/util/TestStringRedactor.java b/common/client/src/test/java/zingg/common/client/util/TestStringRedactor.java
index 10220a5f1..07aff4f66 100644
--- a/common/client/src/test/java/zingg/common/client/util/TestStringRedactor.java
+++ b/common/client/src/test/java/zingg/common/client/util/TestStringRedactor.java
@@ -1,7 +1,5 @@
package zingg.common.client.util;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.stream.Stream;
@@ -10,14 +8,10 @@
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.Arguments;
import static org.junit.jupiter.params.provider.Arguments.arguments;
-import static org.junit.jupiter.api.Named.named;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
-import org.junit.jupiter.api.Test;
-
public class TestStringRedactor {
@ParameterizedTest(name="{0}")
diff --git a/common/client/src/test/resources/testArguments/configTestDontUse.json b/common/client/src/test/resources/testArguments/configTestDontUse.json
new file mode 100644
index 000000000..f1f1ed225
--- /dev/null
+++ b/common/client/src/test/resources/testArguments/configTestDontUse.json
@@ -0,0 +1,70 @@
+{
+ "fieldDefinition":[
+ {
+ "fieldName" : "fname",
+ "matchType" : "fuzzy,null_or_blank",
+ "fields" : "fname",
+ "dataType": "string"
+ },
+ {
+ "fieldName" : "lname",
+ "matchType" : "fuzzy",
+ "fields" : "lname",
+ "dataType": "string"
+ },
+ {
+ "fieldName" : "stNo",
+ "matchType": "exact",
+ "fields" : "stNo",
+ "dataType": "string"
+ },
+ {
+ "fieldName" : "add1",
+ "matchType": "fuzzy,dont_use",
+ "fields" : "add1",
+ "dataType": "string"
+ },
+ {
+ "fieldName" : "add2",
+ "matchType": "dont_use",
+ "fields" : "add2",
+ "dataType": "string"
+ },
+ {
+ "fieldName" : "city",
+ "matchType": "dont_use,fuzzy",
+ "fields" : "city",
+ "dataType": "string"
+ },
+ {
+ "fieldName" : "state",
+ "matchType": "fuzzy",
+ "fields" : "state",
+ "dataType": "string"
+ }
+ ],
+ "output" : [{
+ "name":"output",
+ "format":"csv",
+ "props": {
+ "location": "/tmp/zinggOutput",
+ "delimiter": ",",
+ "header":true
+ }
+ }],
+ "data" : [{
+ "name":"test",
+ "format":"csv",
+ "props": {
+ "location": "examples/febrl/test.csv",
+ "delimiter": ",",
+ "header":false
+ },
+ "schema": "id string, fname string, lname string, stNo string, add1 string, add2 string, city string, areacode string, state string, dob string, ssn string"
+ }],
+ "labelDataSampleSize" : 0.5,
+ "numPartitions":4,
+ "modelId": 100,
+ "zinggDir": "models"
+
+}
diff --git a/common/core/pom.xml b/common/core/pom.xml
index 926187b45..40d61e0c4 100644
--- a/common/core/pom.xml
+++ b/common/core/pom.xml
@@ -27,7 +27,41 @@
org.apache.httpcomponents
httpclient
- 4.5.2
+ 4.5.14
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ 5.8.1
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ 5.8.1
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ 5.8.1
+ test
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+ 2.3.2
+
+
+
+ test-jar
+
+
+
+
+
+
diff --git a/common/core/src/main/java/zingg/common/core/Context.java b/common/core/src/main/java/zingg/common/core/Context.java
deleted file mode 100644
index d475708ee..000000000
--- a/common/core/src/main/java/zingg/common/core/Context.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package zingg.common.core;
-
-import java.io.Serializable;
-
-import zingg.common.client.ZinggClientException;
-import zingg.common.client.license.IZinggLicense;
-import zingg.common.core.util.BlockingTreeUtil;
-import zingg.common.core.util.DSUtil;
-import zingg.common.core.util.GraphUtil;
-import zingg.common.core.util.HashUtil;
-import zingg.common.core.util.ModelUtil;
-import zingg.common.core.util.PipeUtilBase;
-
-public interface Context extends Serializable {
-
- public HashUtil getHashUtil() ;
- public void setHashUtil(HashUtil t) ;
- public GraphUtil getGraphUtil() ;
-
- public void setGraphUtil(GraphUtil t) ;
-
- public void setModelUtil(ModelUtil t);
- public void setBlockingTreeUtil(BlockingTreeUtil t) ;
-
- public ModelUtil getModelUtil();
-
- public void setPipeUtil(PipeUtilBase pipeUtil);
- public void setDSUtil(DSUtil pipeUtil);
- public DSUtil getDSUtil() ;
- public PipeUtilBase getPipeUtil();
- public BlockingTreeUtil getBlockingTreeUtil() ;
-
- public void init(IZinggLicense license)
- throws ZinggClientException;
-
- public void cleanup();
-
- /**convenience method to set all utils
- * especially useful when you dont want to create the connection/spark context etc
- * */
- public void setUtils();
-
- public S getSession();
-
- public void setSession(S session);
-
-
- //public void initHashFns() throws ZinggClientException;
-
-
-
-
-
- }
-
-
-
-
diff --git a/common/core/src/main/java/zingg/common/core/block/Block.java b/common/core/src/main/java/zingg/common/core/block/Block.java
index 35bde6b54..0fdd3665b 100644
--- a/common/core/src/main/java/zingg/common/core/block/Block.java
+++ b/common/core/src/main/java/zingg/common/core/block/Block.java
@@ -13,10 +13,13 @@
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
import zingg.common.client.util.ListMap;
+import zingg.common.core.feature.FeatureFactory;
import zingg.common.core.hash.HashFunction;
public abstract class Block implements Serializable {
+ private static final long serialVersionUID = 1L;
+
public static final Log LOG = LogFactory.getLog(Block.class);
protected ZFrame dupes;
@@ -66,16 +69,13 @@ public void setDupes(ZFrame dupes) {
/**
* @return the types
*
- * public Class[] getTypes() { return types; }
*/
/**
* @param types
- * the types to set
- *
- * public void setTypes(Class[] types) { this.types = types; }
+ * the types to set
*
- * /**
+ *
* @return the maxSize
*/
public long getMaxSize() {
@@ -84,7 +84,7 @@ public long getMaxSize() {
/**
* @param maxSize
- * the maxSize to set
+ * the maxSize to set
*/
public void setMaxSize(long maxSize) {
this.maxSize = maxSize;
@@ -102,10 +102,13 @@ protected void setFunctionsMap(ListMap> m) {
this.functionsMap = m;
}
+ protected Canopy getCanopy(){
+ return new Canopy();
+ }
public CanopygetNodeFromCurrent(Canopynode, HashFunction function,
FieldDefinition context) {
- Canopytrial = new Canopy();
+ Canopytrial = getCanopy();
trial = node.copyTo(trial);
// node.training, node.dupeN, function, context);
trial.function = function;
@@ -113,23 +116,28 @@ protected void setFunctionsMap(ListMap> m) {
return trial;
}
- public abstract T getDataTypeFromString(String t);
+ public void estimateElimCount(Canopy c, long elimCount) {
+ c.estimateElimCount();
+ }
public CanopygetBestNode(Tree> tree, Canopyparent, Canopynode,
List fieldsOfInterest) throws Exception {
long least = Long.MAX_VALUE;
int maxElimination = 0;
Canopybest = null;
-
for (FieldDefinition field : fieldsOfInterest) {
- LOG.debug("Trying for " + field + " with data type " + field.getDataType() + " and real dt "
- + getDataTypeFromString(field.getDataType()));
+ if (LOG.isDebugEnabled()){
+ LOG.debug("Trying for " + field + " with data type " + field.getDataType() + " and real dt "
+ + getFeatureFactory().getDataTypeFromString(field.getDataType()));
+ }
//Class type = FieldClass.getFieldClassClass(field.getFieldClass());
FieldDefinition context = field;
if (least ==0) break;//how much better can it get?
// applicable functions
- List> functions = functionsMap.get(getDataTypeFromString(field.getDataType()));
- LOG.debug("functions are " + functions);
+ List> functions = functionsMap.get(getFeatureFactory().getDataTypeFromString(field.getDataType()));
+ if (LOG.isDebugEnabled()){
+ LOG.debug("functions are " + functions);
+ }
if (functions != null) {
@@ -140,11 +148,13 @@ protected void setFunctionsMap(ListMap> m) {
//!childless.contains(function, field.fieldName)
)
{
- LOG.debug("Evaluating field " + field.fieldName
+ if (LOG.isDebugEnabled()){
+ LOG.debug("Evaluating field " + field.fieldName
+ " and function " + function + " for " + field.dataType);
+ }
Canopytrial = getNodeFromCurrent(node, function,
context);
- trial.estimateElimCount();
+ estimateElimCount(trial, least);
long elimCount = trial.getElimCount();
@@ -178,7 +188,9 @@ protected void setFunctionsMap(ListMap> m) {
}*/
}
else {
- LOG.debug("No child " + function);
+ if (LOG.isDebugEnabled()){
+ LOG.debug("No child " + function);
+ }
//childless.add(function, field.fieldName);
}
@@ -392,7 +404,7 @@ public void printTree(Tree> tree,
}
}
-
+ public abstract FeatureFactory getFeatureFactory();
}
diff --git a/common/core/src/main/java/zingg/common/core/block/Canopy.java b/common/core/src/main/java/zingg/common/core/block/Canopy.java
index 25f0d4124..09451c56d 100644
--- a/common/core/src/main/java/zingg/common/core/block/Canopy.java
+++ b/common/core/src/main/java/zingg/common/core/block/Canopy.java
@@ -20,19 +20,19 @@ public class Canopy implements Serializable {
public static final Log LOG = LogFactory.getLog(Canopy.class);
// created by function edge leading from parent to this node
- HashFunction function;
+ protected HashFunction function;
// aplied on field
- FieldDefinition context;
+ protected FieldDefinition context;
// list of duplicates passed from parent
- List dupeN;
+ protected List dupeN;
// number of duplicates eliminated after function applied on fn context
- long elimCount;
+ protected long elimCount;
// hash of canopy
- Object hash;
+ protected Object hash;
// training set
- List training;
+ protected List training;
// duplicates remaining after function is applied
- List dupeRemaining;
+ protected List dupeRemaining;
public Canopy() {
}
diff --git a/common/core/src/main/java/zingg/common/core/context/Context.java b/common/core/src/main/java/zingg/common/core/context/Context.java
new file mode 100644
index 000000000..410e3ae3d
--- /dev/null
+++ b/common/core/src/main/java/zingg/common/core/context/Context.java
@@ -0,0 +1,89 @@
+package zingg.common.core.context;
+
+import java.io.Serializable;
+
+import zingg.common.client.ZinggClientException;
+import zingg.common.client.util.DSUtil;
+import zingg.common.client.util.PipeUtilBase;
+import zingg.common.core.util.BlockingTreeUtil;
+import zingg.common.core.util.GraphUtil;
+import zingg.common.core.util.HashUtil;
+import zingg.common.core.util.ModelUtil;
+
+public abstract class Context implements Serializable {
+ protected S session;
+ protected PipeUtilBase pipeUtil;
+ protected HashUtil hashUtil;
+ protected DSUtil dsUtil;
+ protected GraphUtil graphUtil;
+ protected ModelUtil modelUtil;
+ protected BlockingTreeUtil blockingTreeUtil;
+
+ public static final String hashFunctionFile = "hashFunctions.json";
+
+ public HashUtil getHashUtil() {
+ return this.hashUtil;
+ }
+ public void setHashUtil(HashUtil t) {
+ this.hashUtil = t;
+ }
+ public GraphUtil getGraphUtil() {
+ return this.graphUtil;
+ }
+
+ public void setGraphUtil(GraphUtil t) {
+ this.graphUtil = t;
+ }
+
+ public void setModelUtil(ModelUtil t){
+ this.modelUtil = t;
+ }
+ public void setBlockingTreeUtil(BlockingTreeUtil t) {
+ this.blockingTreeUtil = t;
+ }
+
+ public ModelUtil getModelUtil(){
+ return this.modelUtil;
+ }
+
+ public void setPipeUtil(PipeUtilBase pipeUtil){
+ this.pipeUtil = pipeUtil;
+ }
+ public void setDSUtil(DSUtil d){
+ this.dsUtil = d;
+ }
+ public DSUtil getDSUtil() {
+ return this.dsUtil;
+ }
+ public PipeUtilBase getPipeUtil(){
+ return this.pipeUtil;
+ }
+ public BlockingTreeUtil getBlockingTreeUtil() {
+ return this.blockingTreeUtil;
+ }
+
+ public abstract void init(S session)
+ throws ZinggClientException;
+
+ public abstract void cleanup();
+
+ /**convenience method to set all utils
+ * especially useful when you dont want to create the connection/spark context etc
+ * */
+ public abstract void setUtils();
+
+ public S getSession(){
+ return session;
+ }
+
+ public void setSession(S session){
+ this.session = session;
+ }
+
+
+
+ }
+
+
+
+
diff --git a/common/core/src/main/java/zingg/common/core/documenter/DataColDocumenter.java b/common/core/src/main/java/zingg/common/core/documenter/DataColDocumenter.java
index c227f5187..b69c32c80 100644
--- a/common/core/src/main/java/zingg/common/core/documenter/DataColDocumenter.java
+++ b/common/core/src/main/java/zingg/common/core/documenter/DataColDocumenter.java
@@ -6,7 +6,7 @@
import zingg.common.client.IArguments;
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
-import zingg.common.core.Context;
+import zingg.common.core.context.Context;
public abstract class DataColDocumenter extends DocumenterBase {
protected static String name = "zingg.DataColDocumenter";
diff --git a/common/core/src/main/java/zingg/common/core/documenter/DataDocumenter.java b/common/core/src/main/java/zingg/common/core/documenter/DataDocumenter.java
index 71737064d..0d88b1424 100644
--- a/common/core/src/main/java/zingg/common/core/documenter/DataDocumenter.java
+++ b/common/core/src/main/java/zingg/common/core/documenter/DataDocumenter.java
@@ -12,7 +12,7 @@
import zingg.common.client.IArguments;
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
-import zingg.common.core.Context;
+import zingg.common.core.context.Context;
public abstract class DataDocumenter extends DocumenterBase {
protected static String name = "zingg.DataDocumenter";
diff --git a/common/core/src/main/java/zingg/common/core/documenter/DocumenterBase.java b/common/core/src/main/java/zingg/common/core/documenter/DocumenterBase.java
index 0f891c839..59858bd0f 100644
--- a/common/core/src/main/java/zingg/common/core/documenter/DocumenterBase.java
+++ b/common/core/src/main/java/zingg/common/core/documenter/DocumenterBase.java
@@ -12,7 +12,7 @@
import zingg.common.client.IArguments;
import zingg.common.client.ZinggClientException;
import zingg.common.client.util.ColName;
-import zingg.common.core.Context;
+import zingg.common.core.context.Context;
import zingg.common.core.executor.ZinggBase;
public abstract class DocumenterBase extends ZinggBase{
diff --git a/common/core/src/main/java/zingg/common/core/documenter/ModelColDocumenter.java b/common/core/src/main/java/zingg/common/core/documenter/ModelColDocumenter.java
index 41d215e63..1bdfb2942 100644
--- a/common/core/src/main/java/zingg/common/core/documenter/ModelColDocumenter.java
+++ b/common/core/src/main/java/zingg/common/core/documenter/ModelColDocumenter.java
@@ -9,7 +9,7 @@
import zingg.common.client.IArguments;
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
-import zingg.common.core.Context;
+import zingg.common.core.context.Context;
public abstract class ModelColDocumenter extends DocumenterBase {
protected static String name = "zingg.ModelColDocumenter";
diff --git a/common/core/src/main/java/zingg/common/core/documenter/ModelDocumenter.java b/common/core/src/main/java/zingg/common/core/documenter/ModelDocumenter.java
index 75363e71c..67c0a7ef5 100644
--- a/common/core/src/main/java/zingg/common/core/documenter/ModelDocumenter.java
+++ b/common/core/src/main/java/zingg/common/core/documenter/ModelDocumenter.java
@@ -10,12 +10,14 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import zingg.common.client.FieldDefUtil;
+import zingg.common.client.FieldDefinition;
import zingg.common.client.IArguments;
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
import zingg.common.client.util.ColName;
import zingg.common.client.util.ColValues;
-import zingg.common.core.Context;
+import zingg.common.core.context.Context;
public abstract class ModelDocumenter extends DocumenterBase {
@@ -30,10 +32,13 @@ public abstract class ModelDocumenter extends DocumenterBase modelColDoc;
protected ZFrame markedRecords;
protected ZFrame unmarkedRecords;
+
+ protected FieldDefUtil fieldDefUtil;
public ModelDocumenter(Context context, IArguments args) {
super(context, args);
markedRecords = getDSUtil().emptyDataFrame();
+ fieldDefUtil = new FieldDefUtil();
}
public void process() throws ZinggClientException {
@@ -45,8 +50,9 @@ protected void createModelDocument() throws ZinggClientException {
try {
LOG.info("Model document generation starts");
- markedRecords = getMarkedRecords().sortAscending(ColName.CLUSTER_COLUMN);
- unmarkedRecords = getUnmarkedRecords().sortAscending(ColName.CLUSTER_COLUMN);
+ // drop columns which are don't use if show concise is true
+ markedRecords = filterForConcise(getMarkedRecords().sortAscending(ColName.CLUSTER_COLUMN));
+ unmarkedRecords = filterForConcise(getUnmarkedRecords().sortAscending(ColName.CLUSTER_COLUMN));
Map root = populateTemplateData();
writeModelDocument(root);
@@ -82,8 +88,7 @@ protected Map populateTemplateData() {
} else {
// fields required to generate basic document
- List columnList = args.getFieldDefinition().stream().map(fd -> fd.getFieldName())
- .collect(Collectors.toList());
+ List columnList = getColumnList();
root.put(TemplateFields.NUM_COLUMNS, columnList.size());
root.put(TemplateFields.COLUMNS, columnList.toArray());
root.put(TemplateFields.CLUSTERS, Collections.emptyList());
@@ -94,6 +99,31 @@ protected Map populateTemplateData() {
return root;
}
+ protected ZFrame filterForConcise(ZFrame df) {
+ if (args.getShowConcise()) {
+ List dontUseFields = getFieldNames(
+ (List extends FieldDefinition>) fieldDefUtil.getFieldDefinitionDontUse(args.getFieldDefinition()));
+ if(!dontUseFields.isEmpty()) {
+ df = df.drop(dontUseFields.toArray(new String[dontUseFields.size()]));
+ }
+ }
+ return df;
+ }
+
+ protected List getColumnList() {
+ List extends FieldDefinition> fieldList = args.getFieldDefinition();
+ //drop columns which are don't use if show concise is true
+ if (args.getShowConcise()) {
+ fieldList = fieldDefUtil.getFieldDefinitionToUse(args.getFieldDefinition());
+ }
+ return getFieldNames(fieldList);
+ }
+
+ protected List getFieldNames(List extends FieldDefinition> fieldList) {
+ return fieldList.stream().map(fd -> fd.getFieldName())
+ .collect(Collectors.toList());
+ }
+
private void putSummaryCounts(Map root) {
// Get the count if not empty
ZFrame markedRecordsPairSummary = markedRecords.groupByCount(ColName.MATCH_FLAG_COL, PAIR_WISE_COUNT);
diff --git a/common/core/src/main/java/zingg/common/core/executor/Documenter.java b/common/core/src/main/java/zingg/common/core/executor/Documenter.java
index 6e80b8aa7..2841720e5 100644
--- a/common/core/src/main/java/zingg/common/core/executor/Documenter.java
+++ b/common/core/src/main/java/zingg/common/core/executor/Documenter.java
@@ -4,7 +4,7 @@
import org.apache.commons.logging.LogFactory;
import zingg.common.client.ZinggClientException;
-import zingg.common.client.ZinggOptions;
+import zingg.common.client.options.ZinggOptions;
import zingg.common.core.documenter.DataDocumenter;
import zingg.common.core.documenter.ModelDocumenter;
@@ -14,7 +14,7 @@ public abstract class Documenter extends ZinggBase {
public static final Log LOG = LogFactory.getLog(Documenter.class);
public Documenter() {
- setZinggOptions(ZinggOptions.GENERATE_DOCS);
+ setZinggOption(ZinggOptions.GENERATE_DOCS);
}
public void execute() throws ZinggClientException {
diff --git a/common/core/src/main/java/zingg/common/core/executor/FindAndLabeller.java b/common/core/src/main/java/zingg/common/core/executor/FindAndLabeller.java
index e4e43109a..b8eb3eff0 100644
--- a/common/core/src/main/java/zingg/common/core/executor/FindAndLabeller.java
+++ b/common/core/src/main/java/zingg/common/core/executor/FindAndLabeller.java
@@ -5,8 +5,7 @@
import zingg.common.client.IArguments;
import zingg.common.client.ZinggClientException;
-import zingg.common.client.ZinggOptions;
-import zingg.common.client.license.IZinggLicense;
+import zingg.common.client.options.ZinggOptions;
public abstract class FindAndLabeller extends ZinggBase {
private static final long serialVersionUID = 1L;
@@ -17,14 +16,14 @@ public abstract class FindAndLabeller extends ZinggBase labeller;
public FindAndLabeller() {
- setZinggOptions(ZinggOptions.FIND_AND_LABEL);
+ setZinggOption(ZinggOptions.FIND_AND_LABEL);
}
@Override
- public void init(IArguments args, IZinggLicense license) throws ZinggClientException {
- finder.init(args, license);
- labeller.init(args, license);
- super.init(args, license);
+ public void init(IArguments args, S s) throws ZinggClientException {
+ finder.init(args,s);
+ labeller.init(args,s);
+ super.init(args,s);
}
@Override
diff --git a/common/core/src/main/java/zingg/common/core/executor/LabelDataViewHelper.java b/common/core/src/main/java/zingg/common/core/executor/LabelDataViewHelper.java
index 0c6024621..d5bd5970d 100644
--- a/common/core/src/main/java/zingg/common/core/executor/LabelDataViewHelper.java
+++ b/common/core/src/main/java/zingg/common/core/executor/LabelDataViewHelper.java
@@ -6,14 +6,12 @@
import org.apache.commons.logging.LogFactory;
import zingg.common.client.ClientOptions;
-import zingg.common.client.IArguments;
import zingg.common.client.ILabelDataViewHelper;
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
-import zingg.common.client.ZinggOptions;
import zingg.common.client.util.ColName;
import zingg.common.client.util.ColValues;
-import zingg.common.core.Context;
+import zingg.common.core.context.Context;
import zingg.common.core.util.LabelMatchType;
public class LabelDataViewHelper extends ZinggBase implements ILabelDataViewHelper {
@@ -21,9 +19,8 @@ public class LabelDataViewHelper extends ZinggBase imp
private static final long serialVersionUID = 1L;
public static final Log LOG = LogFactory.getLog(LabelDataViewHelper.class);
- public LabelDataViewHelper(Context context, ZinggOptions zinggOptions, ClientOptions clientOptions) {
+ public LabelDataViewHelper(Context context, ClientOptions clientOptions) {
setContext(context);
- setZinggOptions(zinggOptions);
setClientOptions(clientOptions);
setName(this.getClass().getName());
}
@@ -40,11 +37,11 @@ public List getClusterIds(ZFrame lines) {
}
- @Override
- public List getDisplayColumns(ZFrame lines, IArguments args) {
- return getDSUtil().getFieldDefColumns(lines, args, false, args.getShowConcise());
- }
-
+// @Override
+// public List getDisplayColumns(ZFrame lines, IArguments args) {
+// return getDSUtil().getFieldDefColumns(lines, args, false, args.getShowConcise());
+// }
+//
@Override
public ZFrame getCurrentPair(ZFrame lines, int index, List clusterIds, ZFrame clusterLines) {
@@ -127,5 +124,7 @@ public void execute() throws ZinggClientException {
public ILabelDataViewHelper getLabelDataViewHelper() throws UnsupportedOperationException {
return this;
}
+
+
}
diff --git a/common/core/src/main/java/zingg/common/core/executor/LabelUpdater.java b/common/core/src/main/java/zingg/common/core/executor/LabelUpdater.java
index 4e3365783..cb1fbe6e3 100644
--- a/common/core/src/main/java/zingg/common/core/executor/LabelUpdater.java
+++ b/common/core/src/main/java/zingg/common/core/executor/LabelUpdater.java
@@ -1,6 +1,5 @@
package zingg.common.core.executor;
-import java.util.List;
import java.util.Scanner;
import org.apache.commons.logging.Log;
@@ -8,7 +7,8 @@
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
-import zingg.common.client.ZinggOptions;
+import zingg.common.client.cols.ZidAndFieldDefSelector;
+import zingg.common.client.options.ZinggOptions;
import zingg.common.client.pipe.Pipe;
import zingg.common.client.util.ColName;
import zingg.common.core.util.LabelMatchType;
@@ -19,7 +19,7 @@ public abstract class LabelUpdater extends Labeller {
public static final Log LOG = LogFactory.getLog(LabelUpdater.class);
public LabelUpdater() {
- setZinggOptions(ZinggOptions.UPDATE_LABEL);
+ setZinggOption(ZinggOptions.UPDATE_LABEL);
}
public void execute() throws ZinggClientException {
@@ -125,14 +125,14 @@ protected ZFrame getUpdatedRecords(ZFrame updatedRecords, int
}
protected int getUserInput(ZFrame lines,ZFrame currentPair,String cluster_id) {
-
- List displayCols = getDSUtil().getFieldDefColumns(lines, args, false, args.getShowConcise());
-
+// List displayCols = getDSUtil().getFieldDefColumns(lines, args, false, args.getShowConcise());
+ ZidAndFieldDefSelector zidAndFieldDefSelector = new ZidAndFieldDefSelector(args.getFieldDefinition(), false, args.getShowConcise());
int matchFlag = currentPair.getAsInt(currentPair.head(),ColName.MATCH_FLAG_COL);
String preMsg = String.format("\n\tThe record pairs belonging to the input cluster id %s are:", cluster_id);
String matchType = LabelMatchType.get(matchFlag).msg;
String postMsg = String.format("\tThe above pair is labeled as %s\n", matchType);
- int selectedOption = displayRecordsAndGetUserInput(getDSUtil().select(currentPair, displayCols), preMsg, postMsg);
+// int selectedOption = displayRecordsAndGetUserInput(getDSUtil().select(currentPair, displayCols), preMsg, postMsg);
+ int selectedOption = displayRecordsAndGetUserInput(currentPair.select(zidAndFieldDefSelector.getCols()), preMsg, postMsg);
getTrainingDataModel().updateLabellerStat(selectedOption, INCREMENT);
getTrainingDataModel().updateLabellerStat(matchFlag, -1*INCREMENT);
getLabelDataViewHelper().printMarkedRecordsStat(
@@ -154,4 +154,4 @@ protected Pipe getOutputPipe() {
}
protected abstract Pipe setSaveModeOnPipe(Pipe p);
-}
\ No newline at end of file
+}
diff --git a/common/core/src/main/java/zingg/common/core/executor/Labeller.java b/common/core/src/main/java/zingg/common/core/executor/Labeller.java
index 7c9575c25..3c496445f 100644
--- a/common/core/src/main/java/zingg/common/core/executor/Labeller.java
+++ b/common/core/src/main/java/zingg/common/core/executor/Labeller.java
@@ -10,7 +10,8 @@
import zingg.common.client.ITrainingDataModel;
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
-import zingg.common.client.ZinggOptions;
+import zingg.common.client.cols.ZidAndFieldDefSelector;
+import zingg.common.client.options.ZinggOptions;
import zingg.common.client.util.ColName;
public abstract class Labeller extends ZinggBase {
@@ -24,7 +25,7 @@ public abstract class Labeller extends ZinggBase {
protected ILabelDataViewHelper labelDataViewHelper;
public Labeller() {
- setZinggOptions(ZinggOptions.LABEL);
+ setZinggOption(ZinggOptions.LABEL);
}
public void execute() throws ZinggClientException {
@@ -79,7 +80,8 @@ public ZFrame processRecordsCli(ZFrame lines) throws ZinggClientE
);
lines = lines.cache();
- List displayCols = getLabelDataViewHelper().getDisplayColumns(lines, args);
+// List displayCols = getLabelDataViewHelper().getDisplayColumns(lines, args);
+ ZidAndFieldDefSelector zidAndFieldDefSelector = new ZidAndFieldDefSelector(args.getFieldDefinition(), false, args.getShowConcise());
//have to introduce as snowframe can not handle row.getAs with column
//name and row and lines are out of order for the code to work properly
//snow getAsString expects row to have same struc as dataframe which is
@@ -104,7 +106,8 @@ public ZFrame processRecordsCli(ZFrame lines) throws ZinggClientE
msg2 = getLabelDataViewHelper().getMsg2(prediction, score);
//String msgHeader = msg1 + msg2;
- selectedOption = displayRecordsAndGetUserInput(getDSUtil().select(currentPair, displayCols), msg1, msg2);
+// selectedOption = displayRecordsAndGetUserInput(getDSUtil().select(currentPair, displayCols), msg1, msg2);
+ selectedOption = displayRecordsAndGetUserInput(currentPair.select(zidAndFieldDefSelector.getCols()), msg1, msg2);
getTrainingDataModel().updateLabellerStat(selectedOption, INCREMENT);
getLabelDataViewHelper().printMarkedRecordsStat(
getTrainingDataModel().getPositivePairsCount(),
@@ -158,7 +161,7 @@ int readCliInput() {
@Override
public ITrainingDataModel getTrainingDataModel() {
if (trainingDataModel==null) {
- this.trainingDataModel = new TrainingDataModel(getContext(), getZinggOptions(), getClientOptions());
+ this.trainingDataModel = new TrainingDataModel(getContext(), getClientOptions());
}
return trainingDataModel;
}
@@ -170,7 +173,7 @@ public void setTrainingDataModel(ITrainingDataModel trainingDataMode
@Override
public ILabelDataViewHelper getLabelDataViewHelper() {
if(labelDataViewHelper==null) {
- labelDataViewHelper = new LabelDataViewHelper(getContext(), getZinggOptions(), getClientOptions());
+ labelDataViewHelper = new LabelDataViewHelper(getContext(), getClientOptions());
}
return labelDataViewHelper;
}
diff --git a/common/core/src/main/java/zingg/common/core/executor/Linker.java b/common/core/src/main/java/zingg/common/core/executor/Linker.java
index 797bb59bc..c271a2161 100644
--- a/common/core/src/main/java/zingg/common/core/executor/Linker.java
+++ b/common/core/src/main/java/zingg/common/core/executor/Linker.java
@@ -5,36 +5,46 @@
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
-import zingg.common.client.ZinggOptions;
+import zingg.common.client.options.ZinggOptions;
import zingg.common.client.util.ColName;
-import zingg.common.client.util.ColValues;
+import zingg.common.core.filter.PredictionFilter;
+import zingg.common.core.pairs.SelfPairBuilderSourceSensitive;
public abstract class Linker