Skip to content

Commit

Permalink
Merge pull request #92 from catsmith/json_format_addition
Browse files Browse the repository at this point in the history
addition JSON format which respects vertical layout argument
  • Loading branch information
rhdekker authored Mar 12, 2024
2 parents 65c8131 + 7e30140 commit fc1917b
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 3 deletions.
22 changes: 21 additions & 1 deletion collatex-pythonport/collatex/core_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ def collate(collation, output="table", layout="horizontal", segmentation=True, n
# create alignment table
table = AlignmentTable(collation, graph, layout, ranking)
if output == "json":
return export_alignment_table_as_json(table)
if layout == "vertical":
return export_alignment_table_as_vertical_json(table, collation)
else:
return export_alignment_table_as_json(table)
if output == "html":
return display_alignment_table_as_html(table)
if output == "html2":
Expand All @@ -86,6 +89,23 @@ def collate(collation, output="table", layout="horizontal", segmentation=True, n
raise Exception("Unknown output type: " + output)


def export_alignment_table_as_vertical_json(table, collation, indent=None):
# print the table vertically
# switch columns and rows
json_output = {"table": []}
sigli = []
for column in table.columns:
row = []
for witness in collation.witnesses:
if witness.sigil not in sigli:
sigli.append(witness.sigil)
cell = column.tokens_per_witness.get(witness.sigil)
row.append([listItem.token_data for listItem in cell] if cell else None)
json_output["table"].append(row)
json_output["witnesses"] = sigli
return json.dumps(json_output, sort_keys=True, indent=indent, ensure_ascii=False)


def export_alignment_table_as_json(table, indent=None, status=False):
json_output = {"table": []}
sigli = []
Expand Down
31 changes: 31 additions & 0 deletions collatex-pythonport/tests/test_alignment_table_rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,37 @@ def testJSONAlignmentTableRenderingNoSegmentation(self):
json_out = collate(collation, output="json", segmentation=False)
self.assertEqual(expected_output, json.loads(json_out))

def testJSONAlignmentTableRenderingNoSegmentationVertical(self):
collation = Collation()
collation.add_plain_witness("A", "This very quick very quick brown wombat")
collation.add_plain_witness("B", "That very quick brown koala")
collation.add_plain_witness("C", "That very quick brown kangaroo")
expected_output = {"table": [[[{"_sigil": "A", "_token_array_position": 0, "n": "This", "t": "This "}],
[{"_sigil": "B", "_token_array_position": 8, "n": "That", "t": "That "}],
[{"_sigil": "C", "_token_array_position": 14, "n": "That", "t": "That "}]],
[[{"_sigil": "A", "_token_array_position": 1, "n": "very", "t": "very "}],
None,
None],
[[{"_sigil": "A", "_token_array_position": 2, "n": "quick", "t": "quick "}],
None,
None],
[[{"_sigil": "A", "_token_array_position": 3, "n": "very", "t": "very "}],
[{"_sigil": "B", "_token_array_position": 9, "n": "very", "t": "very "}],
[{"_sigil": "C", "_token_array_position": 15, "n": "very", "t": "very "}]],
[[{"_sigil": "A", "_token_array_position": 4, "n": "quick", "t": "quick "}],
[{"_sigil": "B", "_token_array_position": 10, "n": "quick", "t": "quick "}],
[{"_sigil": "C", "_token_array_position": 16, "n": "quick", "t": "quick "}]],
[[{"_sigil": "A", "_token_array_position": 5, "n": "brown", "t": "brown "}],
[{"_sigil": "B", "_token_array_position": 11, "n": "brown", "t": "brown "}],
[{"_sigil": "C", "_token_array_position": 17, "n": "brown", "t": "brown "}]],
[[{"_sigil": "A", "_token_array_position": 6, "n": "wombat", "t": "wombat"}],
[{"_sigil": "B", "_token_array_position": 12, "n": "koala", "t": "koala"}],
[{"_sigil": "C", "_token_array_position": 18, "n": "kangaroo", "t": "kangaroo"}]]
],
"witnesses": ["A", "B", "C"]}
json_out = collate(collation, output="json", layout="vertical", segmentation=False)
self.assertEqual(expected_output, json.loads(json_out))

def testColumnStatusInAlignmentTable(self):
collation = Collation()
collation.add_plain_witness("A", "The quick brown fox jumps over the dog.")
Expand Down
6 changes: 4 additions & 2 deletions docs/pythonport.md
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,8 @@ Pretty-printing should be used only for examination, and not for subsequent proc
#### JSON

Setting the `output` value to `"json"` produces JSON output. This is the most complete output format, and therefore a common choice for subsequent preprocessing.
By default the output produces is equivalent to the horizontal alignment tables above. If the layout option is set to `vertical` then the export will be equivalent to the vertical alignment tables above. The latter produces a format which is structurally the same as the `json` output option from the Java
microservices version of collateX. The only difference between the two is that in the Python export `None` is used for empty cells and in the Java microservices an empty array is used.

##### Script

Expand Down Expand Up @@ -681,7 +683,7 @@ print(alignment_table)

#### The `layout` parameter

The `layout` parameter controls whether table output is “horizontal” (which is the default) or “vertical”. It is relevant only for output types `table` and `html`. Otherwise it is ignored: `html2` output is always vertical, and the other output types are not tabular.
The `layout` parameter controls whether table output is “horizontal” (which is the default) or “vertical”. It is relevant only for output types `table`, `html` and `json`. Otherwise it is ignored: `html2` output is always vertical, and the other output types are not tabular.

#### The `indent` parameter

Expand All @@ -700,6 +702,6 @@ In the following table, possible values of the `output` parameter are listed in
**svg** | yes | yes | no | no
**xml** | yes | yes | no | no
**tei** | yes | yes | no | yes
**json** | yes | yes | no | no
**json** | yes | yes | yes | no

Recall that near matching is incompatible with segmentation, so `near_match=True` requires `segmentation=False`.

0 comments on commit fc1917b

Please sign in to comment.