From a2181c70738e30f98a970e89bc82080c2bc95fba Mon Sep 17 00:00:00 2001 From: Cat Smith <910384+catsmith@users.noreply.github.com> Date: Thu, 8 Feb 2024 21:57:38 +0000 Subject: [PATCH 1/2] add a function for json export which respects the layout option and renders a vertical json table export which matches (with the exception of None/[] for empty cells) the json exported from the Java microservices, includes test --- .../collatex/core_functions.py | 22 ++++++++++++- .../tests/test_alignment_table_rendering.py | 31 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/collatex-pythonport/collatex/core_functions.py b/collatex-pythonport/collatex/core_functions.py index 2371b8e6f..9e686d004 100644 --- a/collatex-pythonport/collatex/core_functions.py +++ b/collatex-pythonport/collatex/core_functions.py @@ -69,7 +69,10 @@ def collate(collation, output="table", layout="horizontal", segmentation=True, n # create alignment table table = AlignmentTable(collation, graph, layout, ranking) if output == "json": - return export_alignment_table_as_json(table) + if layout == "vertical": + return export_alignment_table_as_vertical_json(table, collation) + else: + return export_alignment_table_as_json(table) if output == "html": return display_alignment_table_as_html(table) if output == "html2": @@ -86,6 +89,23 @@ def collate(collation, output="table", layout="horizontal", segmentation=True, n raise Exception("Unknown output type: " + output) +def export_alignment_table_as_vertical_json(table, collation, indent=None): + # print the table vertically + # switch columns and rows + json_output = {"table": []} + sigli = [] + for column in table.columns: + row = [] + for witness in collation.witnesses: + if witness.sigil not in sigli: + sigli.append(witness.sigil) + cell = column.tokens_per_witness.get(witness.sigil) + row.append([listItem.token_data for listItem in cell] if cell else None) + json_output["table"].append(row) + json_output["witnesses"] = sigli + return json.dumps(json_output, sort_keys=True, indent=indent, ensure_ascii=False) + + def export_alignment_table_as_json(table, indent=None, status=False): json_output = {"table": []} sigli = [] diff --git a/collatex-pythonport/tests/test_alignment_table_rendering.py b/collatex-pythonport/tests/test_alignment_table_rendering.py index 29d94d55e..2b666ab09 100644 --- a/collatex-pythonport/tests/test_alignment_table_rendering.py +++ b/collatex-pythonport/tests/test_alignment_table_rendering.py @@ -143,6 +143,37 @@ def testJSONAlignmentTableRenderingNoSegmentation(self): json_out = collate(collation, output="json", segmentation=False) self.assertEqual(expected_output, json.loads(json_out)) + def testJSONAlignmentTableRenderingNoSegmentationVertical(self): + collation = Collation() + collation.add_plain_witness("A", "This very quick very quick brown wombat") + collation.add_plain_witness("B", "That very quick brown koala") + collation.add_plain_witness("C", "That very quick brown kangaroo") + expected_output = {"table": [[[{"_sigil": "A", "_token_array_position": 0, "n": "This", "t": "This "}], + [{"_sigil": "B", "_token_array_position": 8, "n": "That", "t": "That "}], + [{"_sigil": "C", "_token_array_position": 14, "n": "That", "t": "That "}]], + [[{"_sigil": "A", "_token_array_position": 1, "n": "very", "t": "very "}], + None, + None], + [[{"_sigil": "A", "_token_array_position": 2, "n": "quick", "t": "quick "}], + None, + None], + [[{"_sigil": "A", "_token_array_position": 3, "n": "very", "t": "very "}], + [{"_sigil": "B", "_token_array_position": 9, "n": "very", "t": "very "}], + [{"_sigil": "C", "_token_array_position": 15, "n": "very", "t": "very "}]], + [[{"_sigil": "A", "_token_array_position": 4, "n": "quick", "t": "quick "}], + [{"_sigil": "B", "_token_array_position": 10, "n": "quick", "t": "quick "}], + [{"_sigil": "C", "_token_array_position": 16, "n": "quick", "t": "quick "}]], + [[{"_sigil": "A", "_token_array_position": 5, "n": "brown", "t": "brown "}], + [{"_sigil": "B", "_token_array_position": 11, "n": "brown", "t": "brown "}], + [{"_sigil": "C", "_token_array_position": 17, "n": "brown", "t": "brown "}]], + [[{"_sigil": "A", "_token_array_position": 6, "n": "wombat", "t": "wombat"}], + [{"_sigil": "B", "_token_array_position": 12, "n": "koala", "t": "koala"}], + [{"_sigil": "C", "_token_array_position": 18, "n": "kangaroo", "t": "kangaroo"}]] + ], + "witnesses": ["A", "B", "C"]} + json_out = collate(collation, output="json", layout="vertical", segmentation=False) + self.assertEqual(expected_output, json.loads(json_out)) + def testColumnStatusInAlignmentTable(self): collation = Collation() collation.add_plain_witness("A", "The quick brown fox jumps over the dog.") From 7e301401e71acb8174207c533b497ab6759516bc Mon Sep 17 00:00:00 2001 From: Cat Smith <910384+catsmith@users.noreply.github.com> Date: Thu, 8 Feb 2024 22:07:36 +0000 Subject: [PATCH 2/2] update docs for new feature --- docs/pythonport.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/pythonport.md b/docs/pythonport.md index 746fff970..dfd85f4b6 100644 --- a/docs/pythonport.md +++ b/docs/pythonport.md @@ -522,6 +522,8 @@ Pretty-printing should be used only for examination, and not for subsequent proc #### JSON Setting the `output` value to `"json"` produces JSON output. This is the most complete output format, and therefore a common choice for subsequent preprocessing. +By default the output produces is equivalent to the horizontal alignment tables above. If the layout option is set to `vertical` then the export will be equivalent to the vertical alignment tables above. The latter produces a format which is structurally the same as the `json` output option from the Java +microservices version of collateX. The only difference between the two is that in the Python export `None` is used for empty cells and in the Java microservices an empty array is used. ##### Script @@ -681,7 +683,7 @@ print(alignment_table) #### The `layout` parameter -The `layout` parameter controls whether table output is “horizontal” (which is the default) or “vertical”. It is relevant only for output types `table` and `html`. Otherwise it is ignored: `html2` output is always vertical, and the other output types are not tabular. +The `layout` parameter controls whether table output is “horizontal” (which is the default) or “vertical”. It is relevant only for output types `table`, `html` and `json`. Otherwise it is ignored: `html2` output is always vertical, and the other output types are not tabular. #### The `indent` parameter @@ -700,6 +702,6 @@ In the following table, possible values of the `output` parameter are listed in **svg** | yes | yes | no | no **xml** | yes | yes | no | no **tei** | yes | yes | no | yes -**json** | yes | yes | no | no +**json** | yes | yes | yes | no Recall that near matching is incompatible with segmentation, so `near_match=True` requires `segmentation=False`. \ No newline at end of file