Skip to content

Commit

Permalink
Build refinement tree more efficiently
Browse files Browse the repository at this point in the history
  • Loading branch information
JanCBrammer committed Dec 3, 2024
1 parent 45f0495 commit fa2c826
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 107 deletions.
106 changes: 3 additions & 103 deletions docs/refinement_tree.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -172,119 +172,19 @@
"source": [
"## Feedback on performance and correctness\n",
"\n",
"Currently (HEAD commit on `main` at September 17) `pytest tests/test_canonicalization.py::test_permutation_invariance` fails for 104 structures (exclusively timeout failures, no invariance failures):\n",
"Currently (HEAD commit on `main` at December 03) `pytest tests/test_canonicalization.py::test_permutation_invariance` fails for 5 structures (exclusively timeout failures, no invariance failures):\n",
"\n",
"```\n",
"====================================================== short test summary info ======================================================\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C28_H32_N8_Zn_B_F4_2_H2O_in_P21_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Paclitaxel] - Failed: Timeout >10.0s\n",
"================================================================================ short test summary info =================================================================================\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C28_H36_N8_Zn_C_F3_S_O3_2_C4_H8_O_in_Pca21] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[14Ethyl14methylheptadecanoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Isohexan] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[18Methylnonadecanoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[q17_a14sadm_in_Pbca] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[cyclophane2] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[tv027_0m_in_P2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[EMIM-BF4] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[ZnC20H34N6O4_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[n17_a96-1_in_P2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C12_H24_Cl2_N6_Zn_in_C2_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Mn2CO10] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Z 16 ethyl 15,17,17 trimethyloctadec9enoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[E 16,16,17,18 tetramethylnonadec9enoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[CHEMBL2348759] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C240] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[10tertButyl10isopropyltridecanoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[m18_a24alex_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C16_H22_N4_O6_S2_Zn_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C21_H42_Cl_N10_Zn_Cl_in_P21_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C16_H20_N4O_Zn_Cl2_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[q17_a37sadm_in_P1_New_P21] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C180] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Neohexan] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C60H60] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C36_H72_Cl4_N14_Zn2_0_5_CH3CN_2_5_Et2O_in_P-1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C18_H24_N4_O4_Zn_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C24_H48_N12_Zn_C_F3_S_O3_2_in_P-1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[n16_a100_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[CCDC2216842v3] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[3 Methylpentan] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[E 20 methyldocos13enoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[[8]cycloparaphenylene] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[2-8-dimethyldecane] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[TEMPO] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C70] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C18_H22_N4_O4_Zn_in_P-1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Os4CO16] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Streptomycin] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C60] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C14_H18_Cl2_N4_Zn_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C15_H18_Cl2_N4_Zn_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[n17_a41sad_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Oxo] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C21_H48_Cl_N10_Zn_S_O3_C_F3_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Zn_C30_H42_N8_S2_O6_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Neopentan] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[a15_a89a_in_P-1_New_I2_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[NH4PF6] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[n16_a19s_in_P-1_New_P21_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C22_H28_N4_O6_Zn_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[5tertButyl4isobutyl5isopropyl4methyloctanoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[rv160_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[2heptadecanoyloxy 3stearoyloxy propyl 17methyloctadecanoate] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Doxorubicin] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[tv241_0m_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[n16_a123_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[PH4PF6] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[3,7,11,15 tetramethylhexadecanoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[n17_b82sad_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[FeBr2TMGasme] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Ir4CO12] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[m17_a35sad_in_P2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[2palmitoyloxy3 14,15,16-trimethylheptadecanoyloxy propyl stearate] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[ro007_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[FeCl2TMG5NMe2asme] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[a15_a88c_in_P-1_New_P21_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Zn_Cl2_N6_C16_H28_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Z 21 methyldocos13enoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[decaline] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Peroxo] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C19_H24_N4_O4_Zn_0_13_H2O_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[2 4 Dimethylbutan] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[rv222_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[rv202_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[9Z,12Z,26Z 35 methylheptatriaconta9,12,26trienoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Icosanoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[CHEMBL415840] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[SbH4PF6] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[6,15Dimethyloctadecanoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[glycerol tristearate] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[a16_a07sada_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C80] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[insulin] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[n16_a77sad1_in_P2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C20_H26_N4_O5_Zn_0_27_H2O_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Fe3CO12] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C18_H26_N4_Zn_Cl2_in_P2_1_2_1_2_1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[rv006_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[9Z,12Z,36E octatriaconta9,12,36trienoic acid] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[NiCOD2] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C16_H30_N6_O4_Zn_in_P-1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Carboplatin] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[nHexan] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[adamantane] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[a1140_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[tv342_in_P1_New_P-1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[tv356_0m_in_P2_1_n] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[C18_H22_Cl2_N4_O2_Zn_in_P2_1_c] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[tv217_p1_in_P1_New_P-1] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[qv043_in_P2_1_New_Pca21] - Failed: Timeout >10.0s\n",
"FAILED tests/test_canonicalization.py::test_permutation_invariance[Vancomycin] - Failed: Timeout >10.0s\n",
"=========================================== 104 failed, 129 passed in 1199.82s (0:19:59) ============================================\n",
"======================================================================= 5 failed, 228 passed in 236.72s (0:03:56) ========================================================================\n",
"\n",
"```\n",
"\n",
"\n",
"The goal is to bring that number down by optimizing performance (e.g., additional pruning of refinement-tree, increasing efficiency of automorphism filter).\n",
"\n",
"For faster feedback run `pytest tests/test_canonicalization.py::test_permutation_invariance[<failed ID>]`."
Expand Down
12 changes: 8 additions & 4 deletions tucan/canonicalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ def get_refinement_tree_node_children(m: nx.Graph) -> Generator[nx.Graph, None,


def filter_out_automorphisms(ms: list[nx.Graph]) -> list[nx.Graph]:
# Caution: Mutates `ms` in-place.
filtered_ms = set()
labelings = set()

for m in ms:
m_relabeled_by_partition = nx.relabel_nodes(
m,
dict(zip(list(m), nx.get_node_attributes(m, PARTITION).values())),
copy=True,
)
labeling = tuple(
sorted([tuple(sorted(edge)) for edge in m_relabeled_by_partition.edges()])
Expand All @@ -89,14 +89,16 @@ def filter_out_automorphisms(ms: list[nx.Graph]) -> list[nx.Graph]:
return list(filtered_ms)


def get_refinement_tree_levels(m: nx.Graph) -> Generator[list[nx.Graph], None, None]:
def get_refinement_tree_levels(
m: nx.Graph, filter_automorphisms: bool = True
) -> Generator[list[nx.Graph], None, None]:
"""
Build BFS refinement-tree and yield each level.
"""
parents = [m]

while True:
yield filter_out_automorphisms(parents)
yield parents
if all(map(partitioning_is_discrete, parents)):
return

Expand All @@ -105,7 +107,9 @@ def get_refinement_tree_levels(m: nx.Graph) -> Generator[list[nx.Graph], None, N
for parent in parents
for child in get_refinement_tree_node_children(parent)
]
parents = children
parents = (
filter_out_automorphisms(children) if filter_automorphisms else children
)


def get_canonical_molecule(ms: list[nx.Graph]) -> nx.Graph:
Expand Down

0 comments on commit fa2c826

Please sign in to comment.