-
Notifications
You must be signed in to change notification settings - Fork 123
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
blocking tree changes #924
Conversation
common/core/src/main/java/zingg/common/core/block/blockingTree/DefaultBockingTreeBuilder.java
Fixed
Show resolved
Hide resolved
common/core/src/main/java/zingg/common/core/block/blockingTree/IBlockingTreeBuilder.java
Fixed
Show resolved
Hide resolved
...on/core/src/main/java/zingg/common/core/block/blockingTree/OptimizedBlockingTreeBuilder.java
Fixed
Show resolved
Hide resolved
...on/core/src/main/java/zingg/common/core/block/blockingTree/OptimizedBlockingTreeBuilder.java
Fixed
Show resolved
Hide resolved
...on/core/src/main/java/zingg/common/core/block/blockingTree/OptimizedBlockingTreeBuilder.java
Fixed
Show resolved
Hide resolved
...ore/src/main/java/zingg/spark/core/block/blockingTree/SparkOptimizedBlockingTreeBuilder.java
Fixed
Show resolved
Hide resolved
spark/core/src/main/java/zingg/spark/core/context/ZinggSparkContext.java
Fixed
Show fixed
Hide fixed
spark/core/src/test/java/zingg/spark/core/block/TestBlockingTreeUtil.java
Fixed
Show fixed
Hide fixed
spark/core/src/test/java/zingg/spark/core/block/TestBlockingTreeUtil.java
Fixed
Show resolved
Hide resolved
spark/core/src/test/java/zingg/spark/core/block/TestBlockingTreeUtil.java
Fixed
Show fixed
Hide fixed
common/core/src/main/java/zingg/common/core/block/DefaultHashFunctionUtility.java
Fixed
Show fixed
Hide fixed
common/core/src/main/java/zingg/common/core/block/DefaultHashFunctionUtility.java
Fixed
Show fixed
Hide fixed
common/core/src/main/java/zingg/common/core/block/DefaultHashFunctionUtility.java
Fixed
Show fixed
Hide fixed
common/core/src/test/java/zingg/common/core/block/dataUtility/CsvReader.java
Fixed
Show fixed
Hide fixed
common/core/src/test/java/zingg/common/core/block/dataUtility/CsvReader.java
Fixed
Show fixed
Hide fixed
common/core/src/test/java/zingg/common/core/block/dataUtility/CsvReader.java
Fixed
Show fixed
Hide fixed
common/core/src/test/java/zingg/common/core/block/dataUtility/DataReader.java
Fixed
Show fixed
Hide fixed
common/core/src/test/java/zingg/common/core/block/dataUtility/DataUtility.java
Fixed
Show fixed
Hide fixed
@@ -0,0 +1,12 @@ | |||
package zingg.common.core.block; | |||
|
|||
public class HashFunctionUtilityFactory { |
Check warning
Code scanning / PMD
This utility class has a non-private constructor Warning
long blockSize, ListMap<T, HashFunction<D,R,C,T>> hashFunctions, IArguments args) { | ||
ZFrame<D,R,C> sample = testData.sample(false, sampleFraction); | ||
long totalCount = sample.count(); | ||
if (blockSize == -1) blockSize = Heuristics.getMaxBlockSize(totalCount, args.getBlockSize()); |
Check warning
Code scanning / PMD
This statement should have braces Warning test
long totalCount = sample.count(); | ||
if (blockSize == -1) blockSize = Heuristics.getMaxBlockSize(totalCount, args.getBlockSize()); | ||
positives = positives.coalesce(1); | ||
Block<D,R,C,T> cblock = getBlock(sample, positives, hashFunctions, blockSize); |
Check warning
Code scanning / PMD
Consider simply returning the value vs storing it in local variable 'recordDF' Warning test
@@ -15,7 +22,18 @@ | |||
this.creator = creator; | |||
} | |||
|
|||
public List<? extends IFromCsv> getRecords(String file, boolean skipHeader) throws FileNotFoundException{ | |||
//default constructor | |||
public CsvReader() { |
Check warning
Code scanning / PMD
Document empty constructor Warning test
|
||
public List<String[]> readDataFromSource(String source) throws IOException, CsvException { | ||
CSVReader csvReader = getCSVReader(source); | ||
List<String[]> allData = csvReader.readAll(); |
Check warning
Code scanning / PMD
Consider simply returning the value vs storing it in local variable 'recordDF' Warning test
@@ -25,4 +43,12 @@ | |||
return records; | |||
} | |||
|
|||
private CSVReader getCSVReader(String source) throws IOException { | |||
FileReader filereader = new FileReader(source); | |||
CSVReader csvReader = new CSVReaderBuilder(filereader) |
Check warning
Code scanning / PMD
Consider simply returning the value vs storing it in local variable 'recordDF' Warning test
@@ -8,6 +8,7 @@ | |||
import zingg.common.client.ZFrame; | |||
import zingg.common.client.util.ListMap; | |||
import zingg.common.core.block.Block; | |||
import zingg.common.core.block.HashUtility; |
Check warning
Code scanning / PMD
Unused import 'zingg.common.client.util.*' Warning
@@ -25,4 +45,13 @@ | |||
return records; | |||
} | |||
|
|||
private CSVReader getCSVReader(String source) throws IOException, URISyntaxException { | |||
File file = new File(Objects.requireNonNull(this.getClass().getClassLoader().getResource(source)).toURI()); | |||
FileReader filereader = new FileReader(file); |
Check warning
Code scanning / PMD
Ensure that resources like this FileReader object are closed after use Warning test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pls check
common/core/src/main/java/zingg/common/core/block/blockingTree/DefaultBockingTreeBuilder.java
Fixed
Show resolved
Hide resolved
|
||
import java.util.List; | ||
|
||
public abstract class DefaultBockingTreeBuilder<D, R, C, T> extends Block<D, R, C, T> implements IBlockingTreeBuilder<D, R, C, T> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what is the need of this class now? we already have the btutil. let us keep the interface but remove this class?
@@ -37,6 +40,7 @@ public Block(ZFrame<D,R,C> training, ZFrame<D,R,C> dupes) { | |||
this.training = training; | |||
this.dupes = dupes; | |||
childless = new ListMap<HashFunction<D,R,C,T>, String>(); | |||
hashFunctionsInCurrentNodePath = new HashSet<>(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
avoid diamond operator
@@ -42,7 +43,7 @@ public void execute() throws ZinggClientException { | |||
ZFrame<D,R,C> testData = getStopWords().preprocessForStopWords(testDataOriginal); | |||
|
|||
Tree<Canopy<R>> blockingTree = getBlockingTreeUtil().createBlockingTreeFromSample(testData, positives, 0.5, | |||
-1, args, getHashUtil().getHashFunctionList()); | |||
-1, args, getHashUtil().getHashFunctionList(), HashUtility.CACHED); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
caller doesnt need to know which bt we are using. We do not want the phases to worry about this.
@@ -87,7 +88,7 @@ public void execute() throws ZinggClientException { | |||
|
|||
ZFrame<D,R,C> sample = getStopWords().preprocessForStopWords(sampleOrginal); | |||
|
|||
Tree<Canopy<R>> tree = getBlockingTreeUtil().createBlockingTree(sample, posPairs, 1, -1, args, getHashUtil().getHashFunctionList()); | |||
Tree<Canopy<R>> tree = getBlockingTreeUtil().createBlockingTree(sample, posPairs, 1, -1, args, getHashUtil().getHashFunctionList(), HashUtility.CACHED); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
see above. This code should not change.
} | ||
|
||
public Block(ZFrame<D,R,C> training, ZFrame<D,R,C> dupes) { | ||
this.hashFunctionUtility = HashFunctionUtilityFactory.getHashFunctionUtility(HashUtility.CACHED); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just call this() ?
private static final String DELIMITER = ":"; | ||
|
||
public CacheBasedHashFunctionUtility() { | ||
this.hashFunctionsInCurrentNodePath = new HashSet<>(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
avoid <>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
import java.net.URISyntaxException; | ||
import java.util.List; | ||
|
||
public interface DataReader { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
start with I and move to util
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
import java.io.FileNotFoundException; | ||
import java.util.List; | ||
|
||
public interface ICsvReader extends DataReader { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to util
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
blocking tree changes!
-> added path set in isHashFunctionUsed() to avoid recursive call at each node