Skip to content

Commit

Permalink
Python Wrapper: Output json to file
Browse files Browse the repository at this point in the history
  • Loading branch information
alimpfard committed Aug 18, 2020
1 parent 04425cf commit 4181ccd
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
5 changes: 5 additions & 0 deletions wrapper/python/nlex/wrap/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ def process_documents(self, ds, is_json=False, to_json=False, filename='-', clea
'tokens': list(x.desanitify(self) for x in self.tokens(clean))
}]
})
if isinstance(to_json, str):
with open(to_json, 'w', encoding='utf-8') as f:
json.dump(res, f)
return None

return json.dumps(res) if to_json else res

def next_id():
Expand Down
6 changes: 3 additions & 3 deletions wrapper/python/test1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys

@nlex.NLexTokenizer
def tokenize(inp, process_docs):
def tokenize(inp, process_docs, outfile='output.json'):
r"""
# Emit a pure_normalise function that simply returns a normalised character
option pure_normaliser on
Expand Down Expand Up @@ -51,8 +51,8 @@ def tokenize(inp, process_docs):
"""
def read(x):
with open(x, 'r+', encoding='utf-8') as f:
return json.load(f)
return process_docs(sum((read(x) for x in inp), []), to_json=True)
return f.read()
return process_docs(list(read(x) for x in inp), to_json=outfile, clean=False)


if len(sys.argv) > 1:
Expand Down

0 comments on commit 4181ccd

Please sign in to comment.