-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #300 from metafacture/oersi-238
Flux commands for analyzing input data
- Loading branch information
Showing
8 changed files
with
511 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
metafix/src/main/java/org/metafacture/metafix/ListFixPaths.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/* | ||
* Copyright 2023 Fabian Steeg, hbz | ||
* | ||
* Licensed under the Apache License, Version 2.0 the "License"; | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.metafacture.metafix; | ||
|
||
import org.metafacture.framework.FluxCommand; | ||
import org.metafacture.framework.StreamReceiver; | ||
import org.metafacture.framework.annotations.Description; | ||
import org.metafacture.framework.annotations.In; | ||
import org.metafacture.framework.annotations.Out; | ||
import org.metafacture.triples.AbstractTripleSort.Compare; | ||
|
||
/** | ||
* Provide a user-friendly way to list all paths available for processing in fix (see also {@link ListFixValues}). | ||
* | ||
* @author Fabian Steeg | ||
*/ | ||
@Description("Lists all paths found in the input records. These paths can be used in a Fix to address fields. Options: " + | ||
"`count` (output occurence frequency of each path, sorted by highest frequency first; default: `true`), " + | ||
"`template` (for formatting the internal triple structure; default: `${o}\t|\t${s}` if count is true, else `${s}`)" + | ||
"`index` (output individual repeated subfields and array elements with index numbers instead of '*'; default: `false`)") | ||
@In(StreamReceiver.class) | ||
@Out(String.class) | ||
@FluxCommand("list-fix-paths") | ||
public class ListFixPaths extends MetafixStreamAnalyzer { | ||
|
||
public ListFixPaths() { | ||
super("nothing()", Compare.PREDICATE); | ||
setIndex(false); | ||
} | ||
|
||
public void setIndex(final boolean index) { | ||
getFix().setEntityMemberName(index ? Metafix.DEFAULT_ENTITY_MEMBER_NAME : "*"); | ||
} | ||
|
||
public boolean getIndex() { | ||
return getFix().getEntityMemberName().equals(Metafix.DEFAULT_ENTITY_MEMBER_NAME); | ||
} | ||
} |
49 changes: 49 additions & 0 deletions
49
metafix/src/main/java/org/metafacture/metafix/ListFixValues.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/* | ||
* Copyright 2023 Fabian Steeg, hbz | ||
* | ||
* Licensed under the Apache License, Version 2.0 the "License"; | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.metafacture.metafix; | ||
|
||
import org.metafacture.framework.FluxCommand; | ||
import org.metafacture.framework.StreamReceiver; | ||
import org.metafacture.framework.annotations.Description; | ||
import org.metafacture.framework.annotations.In; | ||
import org.metafacture.framework.annotations.Out; | ||
import org.metafacture.triples.AbstractTripleSort.Compare; | ||
|
||
/** | ||
* Provide a user-friendly way to list all values for a given path (see {@link ListFixPaths}). | ||
* | ||
* @author Fabian Steeg | ||
*/ | ||
@Description("Lists all values found for the given path. The paths can be found using fix-list-paths. Options: " + | ||
"`count` (output occurence frequency of each value, sorted by highest frequency first; default: `true`)" + | ||
"`template` (for formatting the internal triple structure; default: `${o}\t|\t${s}` if count is true, else `${s}`)") | ||
@In(StreamReceiver.class) | ||
@Out(String.class) | ||
@FluxCommand("list-fix-values") | ||
public class ListFixValues extends MetafixStreamAnalyzer { | ||
|
||
public ListFixValues(final String path) { | ||
super(fix(path), Compare.OBJECT); | ||
} | ||
|
||
private static String fix(final String path) { | ||
return | ||
"copy_field(\"" + path + "\",\"value.$append\")\n" + | ||
"retain(\"value\")"; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
139 changes: 139 additions & 0 deletions
139
metafix/src/main/java/org/metafacture/metafix/MetafixStreamAnalyzer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
/* | ||
* Copyright 2023 Fabian Steeg, hbz | ||
* | ||
* Licensed under the Apache License, Version 2.0 the "License"; | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.metafacture.metafix; | ||
|
||
import org.metafacture.formatting.ObjectTemplate; | ||
import org.metafacture.framework.MetafactureException; | ||
import org.metafacture.framework.ObjectReceiver; | ||
import org.metafacture.framework.helpers.DefaultStreamPipe; | ||
import org.metafacture.mangling.StreamFlattener; | ||
import org.metafacture.triples.AbstractTripleSort.Compare; | ||
import org.metafacture.triples.AbstractTripleSort.Order; | ||
import org.metafacture.triples.StreamToTriples; | ||
import org.metafacture.triples.TripleCount; | ||
import org.metafacture.triples.TripleSort; | ||
|
||
import java.io.FileNotFoundException; | ||
|
||
/** | ||
* Superclass for Metafix-based analyzer modules based on triples (see {@link org.metafacture.framework.objects.Triple}). | ||
* | ||
* @author Fabian Steeg | ||
*/ | ||
/* package-private */ class MetafixStreamAnalyzer extends DefaultStreamPipe<ObjectReceiver<String>> { | ||
|
||
private static final String DEFAULT_COUNTED_TEMPLATE = "${o}\t|\t${s}"; | ||
private static final String DEFAULT_UNCOUNTED_TEMPLATE = "${s}"; | ||
|
||
private final Metafix fix; | ||
private boolean count = true; | ||
private final Compare countBy; | ||
private String template; | ||
|
||
/* package-private */ MetafixStreamAnalyzer(final String fix, final Compare countBy) { | ||
try { | ||
this.fix = new Metafix(fix); | ||
this.fix.setRepeatedFieldsToEntities(true); | ||
} | ||
catch (final FileNotFoundException e) { | ||
throw new MetafactureException(e); | ||
} | ||
this.countBy = countBy; | ||
} | ||
|
||
@Override | ||
protected void onSetReceiver() { | ||
template = template != null ? template : count ? DEFAULT_COUNTED_TEMPLATE : DEFAULT_UNCOUNTED_TEMPLATE; | ||
fix | ||
.setReceiver(new StreamFlattener()) | ||
.setReceiver(new StreamToTriples()) | ||
.setReceiver(tripleCount()) | ||
.setReceiver(tripleSort()) | ||
.setReceiver(new ObjectTemplate<>(template)) | ||
.setReceiver(getReceiver()); | ||
} | ||
|
||
private TripleCount tripleCount() { | ||
final TripleCount tripleCount = new TripleCount(); | ||
tripleCount.setCountBy(countBy); | ||
return tripleCount; | ||
} | ||
|
||
private TripleSort tripleSort() { | ||
final TripleSort tripleSort = new TripleSort(); | ||
tripleSort.setNumeric(count); | ||
tripleSort.setBy(count ? Compare.OBJECT : Compare.SUBJECT); | ||
tripleSort.setOrder(count ? Order.DECREASING : Order.INCREASING); | ||
return tripleSort; | ||
} | ||
|
||
@Override | ||
public void startRecord(final String identifier) { | ||
fix.startRecord(identifier); | ||
} | ||
|
||
@Override | ||
public void endRecord() { | ||
fix.endRecord(); | ||
} | ||
|
||
@Override | ||
public void startEntity(final String name) { | ||
fix.startEntity(name); | ||
} | ||
|
||
@Override | ||
public void endEntity() { | ||
fix.endEntity(); | ||
} | ||
|
||
@Override | ||
public void literal(final String name, final String value) { | ||
fix.literal(name, value); | ||
} | ||
|
||
@Override | ||
protected void onCloseStream() { | ||
fix.closeStream(); | ||
} | ||
|
||
@Override | ||
protected void onResetStream() { | ||
fix.resetStream(); | ||
} | ||
|
||
public void setCount(final boolean count) { | ||
this.count = count; | ||
} | ||
|
||
public boolean getCount() { | ||
return this.count; | ||
} | ||
|
||
public void setTemplate(final String template) { | ||
this.template = template; | ||
} | ||
|
||
public String getTemplate() { | ||
return this.template; | ||
} | ||
|
||
/* package-private */ Metafix getFix() { | ||
return this.fix; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.