Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

list_hdf(): Add return_types parameter #73

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 124 additions & 22 deletions h5io_browser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,13 @@ def delete_item(file_name: str, h5_path: str) -> None:


def list_hdf(
file_name: str, h5_path: str, recursive: Union[bool, int] = False
) -> Tuple[List[str], List[str]]:
file_name: str,
h5_path: str,
recursive: Union[bool, int] = False,
return_types: bool = False,
) -> Union[
Tuple[List[str], List[str]], Tuple[List[str], List[str], List[str], List[str]]
]:
"""
List HDF5 nodes and HDF5 groups of a given HDF5 file at a given h5_path

Expand All @@ -53,18 +58,29 @@ def list_hdf(
h5_path (str): Path to a group in the HDF5 file from where the data is read
recursive (bool/int): Recursively browse through the HDF5 file, either a boolean flag or an integer
which specifies the level of recursion.
return_types (bool): Return HDF5 data types - default False

Returns:
(list, list): list of HDF5 nodes and list of HDF5 groups
(list, list): list of HDF5 nodes and list of HDF5 groups, if return_types is set to False
(list, list, list, list): list of HDF5 nodes, list of HDF5 node type, list of HDF5 groups and list of HDF5 group
types, if return_types is set to True
"""
if os.path.exists(file_name):
with h5py.File(file_name, "r") as hdf:
try:
return _get_hdf_content(hdf=hdf[h5_path], recursive=recursive)
return _get_hdf_content(
hdf=hdf[h5_path], recursive=recursive, return_types=return_types
)
except KeyError:
return [], []
if return_types:
return [], [], [], []
else:
return [], []
else:
return [], []
if return_types:
return [], [], [], []
else:
return [], []


def read_dict_from_hdf(
Expand Down Expand Up @@ -405,17 +421,24 @@ def _write_hdf5_with_json_support(
) from None


def _list_h5path(hdf: Union[h5py.File, h5py.Group]) -> Tuple[List[str], List[str]]:
def _list_h5path(
hdf: Union[h5py.File, h5py.Group], return_types: bool = False
) -> Union[
Tuple[List[str], List[str]], Tuple[List[str], List[str], List[str], List[str]]
]:
"""
List groups and nodes in a given HDF5 path

Args:
hdf (h5py.File/h5py.Group): HDF5 pointer
return_types (bool): return HDF5 data types

Returns:
Tuple[List[str], List[str]]: list of groups and list of nodes
(list, list): list of HDF5 nodes and list of HDF5 groups, if return_types is set to False
(list, list, list, list): list of HDF5 nodes, list of HDF5 node type, list of HDF5 groups and list of HDF5 group
types, if return_types is set to True
"""
group_lst, nodes_lst = [], []
group_lst, nodes_lst, group_types_lst, nodes_types_lst = [], [], [], []
try:
for k in hdf.keys():
if isinstance(hdf[k], h5py.Group):
Expand All @@ -425,22 +448,47 @@ def _list_h5path(hdf: Union[h5py.File, h5py.Group]) -> Tuple[List[str], List[str
and group_attrs_dict["TITLE"] in H5IO_GROUP_TYPES
):
nodes_lst.append(hdf[k].name)
if return_types and "TITLE" in group_attrs_dict.keys():
nodes_types_lst.append(group_attrs_dict["TITLE"])
else:
nodes_types_lst.append(None)
else:
group_lst.append(hdf[k].name)
if return_types and "TITLE" in group_attrs_dict.keys():
group_types_lst.append(group_attrs_dict["TITLE"])
else:
group_types_lst.append(None)
else:
nodes_lst.append(hdf[k].name)
if return_types:
node_attrs_dict = hdf[k].attrs
if "TITLE" in node_attrs_dict.keys():
nodes_types_lst.append(node_attrs_dict["TITLE"])
else:
nodes_types_lst.append(None)
except (AttributeError, KeyError):
return [], []
if return_types:
return [], [], [], []
else:
return [], []
else:
return nodes_lst, group_lst
if return_types:
return nodes_lst, nodes_types_lst, group_lst, group_types_lst
else:
return nodes_lst, group_lst


def _get_hdf_content(
hdf: Union[h5py.File, h5py.Group],
recursive: Union[bool, int] = False,
only_groups: bool = False,
only_nodes: bool = False,
) -> Union[List[str], Tuple[List[str], List[str]]]:
return_types: bool = False,
) -> Union[
List[str],
Tuple[List[str], List[str]],
Tuple[List[str], List[str], List[str], List[str]],
]:
"""
Get all sub-groups of a given HDF5 path

Expand All @@ -450,6 +498,7 @@ def _get_hdf_content(
which specifies the level of recursion.
only_groups (bool): return only HDF5 groups
only_nodes (bool): return only HDF5 nodes
return_types (bool): return HDF5 data types

Returns:
list/(list, list): list of HDF5 groups or list of HDF5 nodes or tuple of both lists
Expand All @@ -470,24 +519,77 @@ def _get_hdf_content(
if recursive_flag:
if not isinstance(recursive, bool) and isinstance(recursive, int):
recursive -= 1
group_lst = []
nodes_lst, groups_to_iterate_lst = _list_h5path(hdf=hdf)
for group in groups_to_iterate_lst:
nodes, groups = _get_hdf_content(hdf=hdf[group], recursive=recursive)
group_lst, group_types_lst = [], []
if return_types:
(
nodes_lst,
nodes_types_lst,
groups_to_iterate_lst,
group_types_to_iterate_lst,
) = _list_h5path(hdf=hdf, return_types=return_types)
else:
nodes_lst, groups_to_iterate_lst = _list_h5path(
hdf=hdf, return_types=return_types
)
nodes_types_lst = []
group_types_to_iterate_lst = groups_to_iterate_lst
for group, group_type in zip(groups_to_iterate_lst, group_types_to_iterate_lst):
if return_types:
nodes, nodes_types, groups, group_types = _get_hdf_content(
hdf=hdf[group],
recursive=recursive,
return_types=return_types,
)
nodes_types_lst += nodes_types
group_types_lst += [group_type] + group_types
else:
nodes, groups = _get_hdf_content(
hdf=hdf[group],
recursive=recursive,
return_types=return_types,
)
nodes_lst += nodes
group_lst += [group] + groups
if only_groups:
return group_lst
if return_types:
return group_lst, group_types_lst
else:
return group_lst
elif only_nodes:
return nodes_lst
if return_types:
return nodes_lst, nodes_types_lst
else:
return nodes_lst
else:
return nodes_lst, group_lst
if return_types:
return nodes_lst, nodes_types_lst, group_lst, group_types_lst
else:
return nodes_lst, group_lst
elif only_groups:
return _list_h5path(hdf=hdf)[1]
if return_types:
_, _, group_lst, group_types_lst = _list_h5path(
hdf=hdf, return_types=return_types
)
return group_lst, group_types_lst
else:
return _list_h5path(hdf=hdf, return_types=return_types)[1]
elif only_nodes:
return _list_h5path(hdf=hdf)[0]
if return_types:
nodes_lst, nodes_types_lst, _, _ = _list_h5path(
hdf=hdf, return_types=return_types
)
return nodes_lst, nodes_types_lst
else:
return _list_h5path(hdf=hdf, return_types=return_types)[0]
else:
return _list_h5path(hdf=hdf)
if return_types:
nodes_lst, nodes_types_lst, group_lst, group_types_lst = _list_h5path(
hdf=hdf, return_types=return_types
)
return nodes_lst, nodes_types_lst, group_lst, group_types_lst
else:
nodes_lst, group_lst = _list_h5path(hdf=hdf, return_types=return_types)
return nodes_lst, group_lst


def _check_json_conversion(value: Any) -> Tuple[Any, bool]:
Expand Down
84 changes: 84 additions & 0 deletions tests/test_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
from runpy import run_path

import numpy as np
import h5py
from unittest import TestCase
Expand Down Expand Up @@ -298,6 +300,88 @@ def test_list_groups(self):
with self.assertRaises(TypeError):
list_hdf(file_name=self.file_name, h5_path="/", recursive=1.0)

def test_list_group_types(self):
nodes, node_types, groups, group_types = list_hdf(
file_name=self.file_name, h5_path=self.h5_path, return_types=True
)
self.assertEqual(groups, ["/data_hierarchical/c"])
self.assertEqual(group_types, [None])
self.assertEqual(nodes, ["/data_hierarchical/a", "/data_hierarchical/b"])
self.assertEqual(node_types, ["json", "int"])
nodes, node_types, groups, group_types = list_hdf(
file_name=self.file_name, h5_path="/wrong_path", return_types=True
)
self.assertEqual(nodes, [])
self.assertEqual(groups, [])
self.assertEqual(node_types, [])
self.assertEqual(group_types, [])
nodes, node_types, groups, group_types = list_hdf(
file_name="empty.h5", h5_path=self.h5_path, return_types=True
)
self.assertEqual(nodes, [])
self.assertEqual(groups, [])
self.assertEqual(node_types, [])
self.assertEqual(group_types, [])
nodes, node_types, groups, group_types = list_hdf(
file_name=self.file_name, h5_path="/", return_types=True
)
self.assertEqual(groups, ["/data_hierarchical"])
self.assertEqual(nodes, [])
self.assertEqual(node_types, [])
self.assertEqual(group_types, [None])
nodes, node_types, groups, group_types = list_hdf(
file_name=self.file_name, h5_path="/", recursive=1, return_types=True
)
self.assertEqual(groups, ["/data_hierarchical", "/data_hierarchical/c"])
self.assertEqual(group_types, [None, None])
self.assertEqual(nodes, ["/data_hierarchical/a", "/data_hierarchical/b"])
self.assertEqual(node_types, ["json", "int"])
nodes, node_types, groups, group_types = list_hdf(
file_name=self.file_name, h5_path="/", recursive=2, return_types=True
)
self.assertEqual(groups, ["/data_hierarchical", "/data_hierarchical/c"])
self.assertEqual(
nodes,
[
"/data_hierarchical/a",
"/data_hierarchical/b",
"/data_hierarchical/c/d",
"/data_hierarchical/c/e",
],
)
self.assertEqual(node_types, ["json", "int", "int", "int"])
self.assertEqual(group_types, [None, None])
nodes, node_types, groups, group_types = list_hdf(
file_name=self.file_name,
h5_path=self.h5_path,
recursive=True,
return_types=True,
)
self.assertEqual(groups, ["/data_hierarchical/c"])
self.assertEqual(
nodes,
[
"/data_hierarchical/a",
"/data_hierarchical/b",
"/data_hierarchical/c/d",
"/data_hierarchical/c/e",
],
)
self.assertEqual(node_types, ["json", "int", "int", "int"])
self.assertEqual(group_types, [None])
nodes, node_types, groups, group_types = list_hdf(
file_name=self.file_name,
h5_path=posixpath.join(self.h5_path, "a"),
recursive=True,
return_types=True,
)
self.assertEqual(groups, [])
self.assertEqual(nodes, [])
self.assertEqual(node_types, [])
self.assertEqual(group_types, [])
with self.assertRaises(TypeError):
list_hdf(file_name=self.file_name, h5_path="/", recursive=1.0)

def test_get_hdf_content(self):
with h5py.File(self.file_name, "r") as hdf:
nodes, groups = _get_hdf_content(
Expand Down
Loading