From 4181ccd0fa357565472d288dff4f365ba3244255 Mon Sep 17 00:00:00 2001 From: AnotherTest Date: Tue, 18 Aug 2020 07:24:42 +0430 Subject: [PATCH] Python Wrapper: Output json to file --- wrapper/python/nlex/wrap/wrapper.py | 5 +++++ wrapper/python/test1.py | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/wrapper/python/nlex/wrap/wrapper.py b/wrapper/python/nlex/wrap/wrapper.py index 22634f2..a6f7bb6 100644 --- a/wrapper/python/nlex/wrap/wrapper.py +++ b/wrapper/python/nlex/wrap/wrapper.py @@ -204,6 +204,11 @@ def process_documents(self, ds, is_json=False, to_json=False, filename='-', clea 'tokens': list(x.desanitify(self) for x in self.tokens(clean)) }] }) + if isinstance(to_json, str): + with open(to_json, 'w', encoding='utf-8') as f: + json.dump(res, f) + return None + return json.dumps(res) if to_json else res def next_id(): diff --git a/wrapper/python/test1.py b/wrapper/python/test1.py index b269db6..b877657 100644 --- a/wrapper/python/test1.py +++ b/wrapper/python/test1.py @@ -3,7 +3,7 @@ import sys @nlex.NLexTokenizer -def tokenize(inp, process_docs): +def tokenize(inp, process_docs, outfile='output.json'): r""" # Emit a pure_normalise function that simply returns a normalised character option pure_normaliser on @@ -51,8 +51,8 @@ def tokenize(inp, process_docs): """ def read(x): with open(x, 'r+', encoding='utf-8') as f: - return json.load(f) - return process_docs(sum((read(x) for x in inp), []), to_json=True) + return f.read() + return process_docs(list(read(x) for x in inp), to_json=outfile, clean=False) if len(sys.argv) > 1: