Skip to content

Commit

Permalink
Realigning to eXist-db standards
Browse files Browse the repository at this point in the history
  • Loading branch information
lcahlander committed Feb 27, 2022
1 parent c04bcc0 commit 1d18f57
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 67 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

<groupId>org.exist-db</groupId>
<artifactId>commons-text-functions</artifactId>
<version>1.2.0</version>
<version>1.3.0</version>

<name>Apache Commons Text Functions</name>
<description>This is a library function module for eXist-db that wraps the specifically finding the similarities and distances between strings</description>
Expand Down Expand Up @@ -54,7 +54,7 @@
<!-- used in the EXPath Package Descriptor -->
<package-name>https://exist-db.org/commons-text/lib</package-name>

<example.module.namespace>https://exist-db.org/commons-text/lib</example.module.namespace>
<example.module.namespace>https://exist-db.org/xquery/commons-text</example.module.namespace>
<example.module.java.classname>CommonsTextModule</example.module.java.classname>
</properties>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@
package org.exist.xquery.functions.commons.text;

import org.apache.commons.text.similarity.*;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;

import org.exist.dom.QName;
import org.exist.xquery.*;
import org.exist.xquery.value.*;

import java.util.Arrays;
Expand All @@ -39,85 +38,104 @@
public class CommonsTextFunctions extends BasicFunction {

private static final String COSINE_SIMILARITY = "cosine-similarity";
static final FunctionSignature FS_COSINE_SIMILARITY = functionSignature(
COSINE_SIMILARITY,

public final static FunctionSignature FNS_COSINE_SIMILARITY = new FunctionSignature(
new QName(COSINE_SIMILARITY, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the cosine similarity between two strings.",
returns(Type.DOUBLE),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right"),
optParam("delimiter", Type.STRING, "The optional text delimiter. The default is a space.")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string"),
new FunctionParameterSequenceType("delimiter", Type.STRING, Cardinality.ZERO_OR_ONE, "The delimiter between the words")
},
new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the similarity score")
);

private static final String COSINE_DISTANCE = "cosine-distance";
static final FunctionSignature FS_COSINE_DISTANCE = functionSignature(
COSINE_DISTANCE,
public final static FunctionSignature FNS_COSINE_DISTANCE = new FunctionSignature(
new QName(COSINE_DISTANCE, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the cosine distance between two strings.",
returns(Type.DOUBLE),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string")
},
new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the distance score")
);

private static final String HAMMING_DISTANCE = "hamming-distance";
static final FunctionSignature FS_HAMMING_DISTANCE = functionSignature(
HAMMING_DISTANCE,
public final static FunctionSignature FNS_HAMMING_DISTANCE = new FunctionSignature(
new QName(HAMMING_DISTANCE, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the hamming distance between two strings.",
returns(Type.INTEGER),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string")
},
new FunctionReturnSequenceType(Type.INTEGER, Cardinality.EXACTLY_ONE, "the distance score")
);

private static final String JACCARD_SIMILARITY = "jaccard-similarity";
static final FunctionSignature FS_JACCARD_SIMILARITY = functionSignature(
JACCARD_SIMILARITY,
public final static FunctionSignature FNS_JACCARD_SIMILARITY = new FunctionSignature(
new QName(JACCARD_SIMILARITY, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the Jaccard similarity between two strings.",
returns(Type.DOUBLE),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string")
},
new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the similarity score")
);

private static final String JACCARD_DISTANCE = "jaccard-distance";
static final FunctionSignature FS_JACCARD_DISTANCE = functionSignature(
JACCARD_DISTANCE,
public final static FunctionSignature FNS_JACCARD_DISTANCE = new FunctionSignature(
new QName(JACCARD_DISTANCE, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the Jaccard distance between two strings.",
returns(Type.DOUBLE),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string")
},
new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the distance score")
);

private static final String JARO_WINKLER_SIMILARITY = "jaro-winkler-similarity";
static final FunctionSignature FS_JARO_WINKLER_SIMILARITY = functionSignature(
JARO_WINKLER_SIMILARITY,
public final static FunctionSignature FNS_JARO_WINKLER_SIMILARITY = new FunctionSignature(
new QName(JARO_WINKLER_SIMILARITY, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the Jaro Winkler similarity between two strings.",
returns(Type.DOUBLE),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string")
},
new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the similarity score")
);

private static final String JARO_WINKLER_DISTANCE = "jaro-winkler-distance";
static final FunctionSignature FS_JARO_WINKLER_DISTANCE = functionSignature(
JARO_WINKLER_DISTANCE,
public final static FunctionSignature FNS_JARO_WINKLER_DISTANCE = new FunctionSignature(
new QName(JARO_WINKLER_DISTANCE, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the Jaro Winkler distance between two strings.",
returns(Type.DOUBLE),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string")
},
new FunctionReturnSequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE, "the distance score")
);

private static final String LONGEST_COMMON_SUBSEQUENCE = "longest-common-subsequence";
static final FunctionSignature FS_LONGEST_COMMON_SUBSEQUENCE = functionSignature(
LONGEST_COMMON_SUBSEQUENCE,
public final static FunctionSignature FNS_LONGEST_COMMON_SUBSEQUENCE = new FunctionSignature(
new QName(LONGEST_COMMON_SUBSEQUENCE, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the longest common subsequence between two strings.",
returns(Type.INTEGER),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string")
},
new FunctionReturnSequenceType(Type.INTEGER, Cardinality.EXACTLY_ONE, "the longest common subsequence")
);

private static final String LONGEST_COMMON_SUBSEQUENCE_DISTANCE = "longest-common-subsequence-distance";
static final FunctionSignature FS_LONGEST_COMMON_SUBSEQUENCE_DISTANCE = functionSignature(
LONGEST_COMMON_SUBSEQUENCE_DISTANCE,
public final static FunctionSignature FNS_LONGEST_COMMON_SUBSEQUENCE_DISTANCE = new FunctionSignature(
new QName(LONGEST_COMMON_SUBSEQUENCE_DISTANCE, CommonsTextModule.NAMESPACE_URI, CommonsTextModule.PREFIX),
"Measures the longest common subsequence distance between two strings.",
returns(Type.INTEGER),
param("left", Type.STRING, "Left"),
param("right", Type.STRING, "Right")
new SequenceType[]{
new FunctionParameterSequenceType("left", Type.STRING, Cardinality.EXACTLY_ONE, "The left string"),
new FunctionParameterSequenceType("right", Type.STRING, Cardinality.EXACTLY_ONE, "The right string")
},
new FunctionReturnSequenceType(Type.INTEGER, Cardinality.EXACTLY_ONE, "the longest common subsequence distance")
);

public CommonsTextFunctions(final XQueryContext context, final FunctionSignature signature) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,23 @@
*/
public class CommonsTextModule extends AbstractInternalModule {

public static final String NAMESPACE_URI = "https://exist-db.org/commons-text/lib";
public static final String PREFIX = "text";
public static final String NAMESPACE_URI = "https://exist-db.org/xquery/commons-text";
public static final String PREFIX = "commons-text";
public static final String RELEASED_IN_VERSION = "eXist-6.0.0";

// register the functions of the module
public static final FunctionDef[] functions = functionDefs(
FunctionDSL.functionDefs(CommonsTextFunctions.class,
CommonsTextFunctions.FS_COSINE_SIMILARITY,
CommonsTextFunctions.FS_COSINE_DISTANCE,
CommonsTextFunctions.FS_HAMMING_DISTANCE,
CommonsTextFunctions.FS_JACCARD_SIMILARITY,
CommonsTextFunctions.FS_JACCARD_DISTANCE,
CommonsTextFunctions.FS_JARO_WINKLER_SIMILARITY,
CommonsTextFunctions.FS_JARO_WINKLER_DISTANCE,
CommonsTextFunctions.FS_LONGEST_COMMON_SUBSEQUENCE,
CommonsTextFunctions.FS_LONGEST_COMMON_SUBSEQUENCE_DISTANCE
)
);
public static final FunctionDef[] functions = {
new FunctionDef(CommonsTextFunctions.FNS_COSINE_SIMILARITY, CommonsTextFunctions.class),
new FunctionDef(CommonsTextFunctions.FNS_COSINE_DISTANCE, CommonsTextFunctions.class),
new FunctionDef(CommonsTextFunctions.FNS_HAMMING_DISTANCE, CommonsTextFunctions.class),
new FunctionDef(CommonsTextFunctions.FNS_JACCARD_SIMILARITY, CommonsTextFunctions.class),
new FunctionDef(CommonsTextFunctions.FNS_JACCARD_DISTANCE, CommonsTextFunctions.class),
new FunctionDef(CommonsTextFunctions.FNS_JARO_WINKLER_SIMILARITY, CommonsTextFunctions.class),
new FunctionDef(CommonsTextFunctions.FNS_JARO_WINKLER_DISTANCE, CommonsTextFunctions.class),
new FunctionDef(CommonsTextFunctions.FNS_LONGEST_COMMON_SUBSEQUENCE, CommonsTextFunctions.class),
new FunctionDef(CommonsTextFunctions.FNS_LONGEST_COMMON_SUBSEQUENCE_DISTANCE, CommonsTextFunctions.class)

};

public CommonsTextModule(final Map<String, List<? extends Object>> parameters) {
super(functions, parameters);
Expand Down

0 comments on commit 1d18f57

Please sign in to comment.