diff --git a/content/algorithms/assortativity/correlation.md b/content/algorithms/assortativity/correlation.md index c5aec79d..fa7a3867 100644 --- a/content/algorithms/assortativity/correlation.md +++ b/content/algorithms/assortativity/correlation.md @@ -5,7 +5,7 @@ jupytext: extension: .md format_name: myst format_version: 0.13 - jupytext_version: 1.13.8 + jupytext_version: 1.14.2 kernelspec: display_name: Python 3 language: python diff --git a/content/algorithms/dag/index.md b/content/algorithms/dag/index.md index 6022e17f..f02281d2 100644 --- a/content/algorithms/dag/index.md +++ b/content/algorithms/dag/index.md @@ -5,7 +5,7 @@ jupytext: extension: .md format_name: myst format_version: 0.13 - jupytext_version: 1.13.8 + jupytext_version: 1.14.2 kernelspec: display_name: Python 3 language: python diff --git a/content/algorithms/euler/euler.md b/content/algorithms/euler/euler.md index f8ff2ac9..eb1777e7 100644 --- a/content/algorithms/euler/euler.md +++ b/content/algorithms/euler/euler.md @@ -5,7 +5,7 @@ jupytext: extension: .md format_name: myst format_version: 0.13 - jupytext_version: 1.13.6 + jupytext_version: 1.14.2 kernelspec: display_name: Python 3 (ipykernel) language: python diff --git a/content/algorithms/flow/dinitz_alg.md b/content/algorithms/flow/dinitz_alg.md index 9df18d89..4ae67fc0 100644 --- a/content/algorithms/flow/dinitz_alg.md +++ b/content/algorithms/flow/dinitz_alg.md @@ -5,7 +5,7 @@ jupytext: extension: .md format_name: myst format_version: 0.13 - jupytext_version: 1.13.8 + jupytext_version: 1.14.2 kernelspec: display_name: Python 3 (ipykernel) language: python diff --git a/content/algorithms/index.md b/content/algorithms/index.md index cd233db9..c941e4a3 100644 --- a/content/algorithms/index.md +++ b/content/algorithms/index.md @@ -1,5 +1,10 @@ + ++++ + ++++ + # Algorithms A closer look at some of the algorithms and network analysis techniques @@ -13,4 +18,5 @@ assortativity/correlation dag/index flow/dinitz_alg euler/euler +sub_isomorphism/subgraph_iso_img ``` diff --git a/content/algorithms/sub_isomorphism/data/1CRN_edgelist.txt b/content/algorithms/sub_isomorphism/data/1CRN_edgelist.txt new file mode 100644 index 00000000..2268ab10 --- /dev/null +++ b/content/algorithms/sub_isomorphism/data/1CRN_edgelist.txt @@ -0,0 +1,175 @@ +A1 A2 +A1 A3 +A1 A34 +A1 A35 +A1 A36 +A1 A37 +A1 A38 +A2 A3 +A2 A4 +A2 A32 +A2 A33 +A2 A34 +A2 A35 +A3 A4 +A3 A5 +A3 A32 +A3 A33 +A3 A34 +A3 A40 +A3 A44 +A3 A45 +A3 A46 +A4 A5 +A4 A6 +A4 A9 +A4 A10 +A4 A31 +A4 A32 +A4 A33 +A4 A44 +A4 A45 +A4 A46 +A5 A6 +A5 A9 +A5 A31 +A5 A32 +A5 A43 +A5 A44 +A5 A45 +A5 A46 +A6 A7 +A6 A8 +A6 A9 +A6 A10 +A6 A46 +A7 A8 +A7 A9 +A7 A10 +A7 A11 +A7 A46 +A8 A9 +A8 A10 +A8 A11 +A8 A12 +A9 A10 +A9 A11 +A9 A12 +A9 A13 +A9 A30 +A9 A31 +A9 A32 +A10 A11 +A10 A12 +A10 A13 +A10 A14 +A10 A46 +A11 A12 +A11 A13 +A11 A14 +A11 A15 +A12 A13 +A12 A14 +A12 A15 +A12 A16 +A12 A30 +A13 A14 +A13 A15 +A13 A16 +A13 A17 +A13 A26 +A13 A30 +A14 A15 +A14 A16 +A14 A17 +A14 A18 +A15 A16 +A15 A17 +A15 A18 +A16 A17 +A16 A18 +A16 A19 +A16 A21 +A16 A26 +A17 A18 +A17 A19 +A17 A20 +A17 A21 +A17 A26 +A18 A19 +A18 A20 +A19 A20 +A19 A21 +A20 A21 +A20 A22 +A21 A22 +A21 A23 +A21 A25 +A21 A26 +A22 A23 +A22 A24 +A22 A25 +A22 A26 +A23 A24 +A23 A25 +A23 A26 +A23 A27 +A24 A25 +A24 A26 +A24 A27 +A24 A28 +A25 A26 +A25 A27 +A25 A28 +A25 A29 +A26 A27 +A26 A28 +A26 A29 +A26 A30 +A27 A28 +A27 A29 +A27 A30 +A27 A31 +A27 A32 +A27 A33 +A28 A29 +A28 A30 +A28 A31 +A28 A32 +A29 A30 +A29 A31 +A30 A31 +A30 A32 +A31 A32 +A31 A33 +A32 A33 +A33 A34 +A33 A35 +A34 A35 +A34 A36 +A35 A36 +A35 A37 +A36 A37 +A36 A38 +A37 A38 +A37 A39 +A38 A39 +A38 A40 +A39 A40 +A39 A41 +A40 A41 +A40 A42 +A40 A44 +A40 A45 +A41 A42 +A41 A43 +A41 A44 +A41 A45 +A42 A43 +A42 A44 +A42 A45 +A43 A44 +A43 A45 +A44 A45 +A44 A46 +A45 A46 diff --git a/content/algorithms/sub_isomorphism/data/1EJG_edgelist.txt b/content/algorithms/sub_isomorphism/data/1EJG_edgelist.txt new file mode 100644 index 00000000..2268ab10 --- /dev/null +++ b/content/algorithms/sub_isomorphism/data/1EJG_edgelist.txt @@ -0,0 +1,175 @@ +A1 A2 +A1 A3 +A1 A34 +A1 A35 +A1 A36 +A1 A37 +A1 A38 +A2 A3 +A2 A4 +A2 A32 +A2 A33 +A2 A34 +A2 A35 +A3 A4 +A3 A5 +A3 A32 +A3 A33 +A3 A34 +A3 A40 +A3 A44 +A3 A45 +A3 A46 +A4 A5 +A4 A6 +A4 A9 +A4 A10 +A4 A31 +A4 A32 +A4 A33 +A4 A44 +A4 A45 +A4 A46 +A5 A6 +A5 A9 +A5 A31 +A5 A32 +A5 A43 +A5 A44 +A5 A45 +A5 A46 +A6 A7 +A6 A8 +A6 A9 +A6 A10 +A6 A46 +A7 A8 +A7 A9 +A7 A10 +A7 A11 +A7 A46 +A8 A9 +A8 A10 +A8 A11 +A8 A12 +A9 A10 +A9 A11 +A9 A12 +A9 A13 +A9 A30 +A9 A31 +A9 A32 +A10 A11 +A10 A12 +A10 A13 +A10 A14 +A10 A46 +A11 A12 +A11 A13 +A11 A14 +A11 A15 +A12 A13 +A12 A14 +A12 A15 +A12 A16 +A12 A30 +A13 A14 +A13 A15 +A13 A16 +A13 A17 +A13 A26 +A13 A30 +A14 A15 +A14 A16 +A14 A17 +A14 A18 +A15 A16 +A15 A17 +A15 A18 +A16 A17 +A16 A18 +A16 A19 +A16 A21 +A16 A26 +A17 A18 +A17 A19 +A17 A20 +A17 A21 +A17 A26 +A18 A19 +A18 A20 +A19 A20 +A19 A21 +A20 A21 +A20 A22 +A21 A22 +A21 A23 +A21 A25 +A21 A26 +A22 A23 +A22 A24 +A22 A25 +A22 A26 +A23 A24 +A23 A25 +A23 A26 +A23 A27 +A24 A25 +A24 A26 +A24 A27 +A24 A28 +A25 A26 +A25 A27 +A25 A28 +A25 A29 +A26 A27 +A26 A28 +A26 A29 +A26 A30 +A27 A28 +A27 A29 +A27 A30 +A27 A31 +A27 A32 +A27 A33 +A28 A29 +A28 A30 +A28 A31 +A28 A32 +A29 A30 +A29 A31 +A30 A31 +A30 A32 +A31 A32 +A31 A33 +A32 A33 +A33 A34 +A33 A35 +A34 A35 +A34 A36 +A35 A36 +A35 A37 +A36 A37 +A36 A38 +A37 A38 +A37 A39 +A38 A39 +A38 A40 +A39 A40 +A39 A41 +A40 A41 +A40 A42 +A40 A44 +A40 A45 +A41 A42 +A41 A43 +A41 A44 +A41 A45 +A42 A43 +A42 A44 +A42 A45 +A43 A44 +A43 A45 +A44 A45 +A44 A46 +A45 A46 diff --git a/content/algorithms/sub_isomorphism/data/1FN3_edgelist.txt b/content/algorithms/sub_isomorphism/data/1FN3_edgelist.txt new file mode 100644 index 00000000..818076d7 --- /dev/null +++ b/content/algorithms/sub_isomorphism/data/1FN3_edgelist.txt @@ -0,0 +1,548 @@ +A1 A2 +A1 A3 +A2 A3 +A2 A4 +A2 A6 +A3 A4 +A3 A5 +A3 A6 +A3 A7 +A4 A5 +A4 A6 +A4 A7 +A4 A8 +A5 A6 +A5 A7 +A5 A8 +A5 A9 +A6 A7 +A6 A8 +A6 A9 +A6 A10 +A6 A124 +A7 A8 +A7 A9 +A7 A10 +A7 A11 +A8 A9 +A8 A10 +A8 A11 +A8 A12 +A9 A10 +A9 A11 +A9 A12 +A9 A13 +A9 A121 +A9 A124 +A10 A11 +A10 A12 +A10 A13 +A10 A14 +A10 A15 +A10 A121 +A10 A124 +A10 A125 +A11 A12 +A11 A13 +A11 A14 +A11 A15 +A11 A70 +A12 A13 +A12 A14 +A12 A15 +A12 A16 +A13 A14 +A13 A15 +A13 A16 +A13 A116 +A14 A15 +A14 A16 +A14 A17 +A15 A16 +A15 A17 +A15 A18 +A16 A17 +A16 A18 +A17 A18 +A17 A19 +A17 A20 +A17 A21 +A18 A19 +A18 A20 +A18 A21 +A19 A20 +A19 A21 +A19 A22 +A20 A21 +A20 A22 +A20 A23 +A20 A24 +A21 A22 +A21 A23 +A21 A24 +A21 A25 +A21 A60 +A21 A63 +A21 A64 +A22 A23 +A22 A24 +A22 A25 +A22 A26 +A22 A59 +A22 A60 +A23 A24 +A23 A25 +A23 A26 +A23 A27 +A23 A59 +A24 A25 +A24 A26 +A24 A27 +A24 A28 +A24 A108 +A25 A26 +A25 A27 +A25 A28 +A25 A29 +A25 A59 +A25 A60 +A25 A62 +A25 A63 +A26 A27 +A26 A28 +A26 A29 +A26 A55 +A26 A56 +A26 A57 +A26 A58 +A26 A59 +A26 A60 +A27 A28 +A27 A29 +A27 A30 +A27 A31 +A28 A29 +A28 A30 +A28 A31 +A28 A32 +A28 A104 +A28 A105 +A28 A108 +A29 A30 +A29 A31 +A29 A32 +A29 A33 +A29 A55 +A30 A31 +A30 A32 +A30 A33 +A30 A34 +A31 A32 +A31 A33 +A31 A34 +A31 A35 +A31 A104 +A32 A33 +A32 A34 +A32 A35 +A32 A36 +A32 A39 +A32 A100 +A32 A101 +A32 A104 +A33 A34 +A33 A35 +A33 A36 +A33 A37 +A33 A39 +A33 A40 +A33 A48 +A34 A35 +A34 A36 +A34 A37 +A35 A36 +A35 A37 +A36 A37 +A36 A38 +A36 A39 +A37 A38 +A37 A39 +A37 A40 +A38 A39 +A38 A40 +A38 A41 +A39 A40 +A39 A41 +A39 A42 +A40 A41 +A40 A42 +A40 A43 +A40 A44 +A41 A42 +A41 A43 +A41 A44 +A42 A43 +A42 A44 +A43 A44 +A43 A45 +A43 A46 +A44 A45 +A44 A46 +A45 A46 +A45 A47 +A46 A47 +A46 A48 +A46 A54 +A47 A48 +A47 A49 +A47 A52 +A47 A54 +A48 A49 +A48 A50 +A49 A50 +A49 A51 +A49 A52 +A50 A51 +A50 A52 +A51 A52 +A51 A53 +A51 A56 +A52 A53 +A52 A54 +A52 A55 +A52 A56 +A53 A54 +A53 A55 +A53 A56 +A53 A57 +A54 A55 +A54 A56 +A54 A57 +A54 A58 +A55 A56 +A55 A57 +A55 A58 +A55 A59 +A56 A57 +A56 A58 +A56 A59 +A57 A58 +A57 A59 +A57 A60 +A58 A59 +A58 A60 +A58 A61 +A58 A62 +A59 A60 +A59 A61 +A59 A62 +A59 A63 +A60 A61 +A60 A62 +A60 A63 +A60 A64 +A61 A62 +A61 A63 +A61 A64 +A61 A65 +A62 A63 +A62 A64 +A62 A65 +A62 A66 +A63 A64 +A63 A65 +A63 A66 +A63 A67 +A64 A65 +A64 A66 +A64 A67 +A65 A66 +A65 A67 +A65 A68 +A65 A69 +A66 A67 +A66 A68 +A66 A69 +A66 A70 +A67 A68 +A67 A69 +A67 A70 +A67 A71 +A68 A69 +A68 A70 +A68 A71 +A68 A72 +A69 A70 +A69 A71 +A69 A72 +A69 A73 +A69 A76 +A69 A79 +A70 A71 +A70 A72 +A70 A73 +A71 A72 +A71 A73 +A72 A73 +A72 A74 +A72 A75 +A72 A76 +A72 A79 +A73 A74 +A73 A75 +A73 A76 +A74 A75 +A74 A76 +A75 A76 +A75 A77 +A75 A78 +A75 A79 +A76 A77 +A76 A78 +A76 A79 +A76 A80 +A77 A78 +A77 A79 +A77 A80 +A77 A81 +A77 A135 +A78 A79 +A78 A80 +A78 A81 +A79 A80 +A79 A81 +A80 A81 +A80 A82 +A80 A83 +A80 A84 +A81 A82 +A81 A83 +A81 A84 +A81 A85 +A82 A83 +A82 A84 +A82 A85 +A82 A86 +A83 A84 +A83 A85 +A83 A86 +A83 A87 +A83 A136 +A84 A85 +A84 A86 +A84 A87 +A84 A135 +A84 A136 +A84 A137 +A84 A138 +A84 A139 +A85 A86 +A85 A87 +A85 A88 +A85 A89 +A85 A136 +A85 A139 +A86 A87 +A86 A88 +A86 A89 +A86 A90 +A86 A91 +A87 A88 +A87 A89 +A87 A90 +A87 A91 +A87 A92 +A87 A93 +A88 A89 +A88 A90 +A88 A91 +A88 A92 +A88 A139 +A88 A140 +A89 A90 +A89 A91 +A89 A92 +A90 A91 +A90 A92 +A91 A92 +A91 A93 +A92 A93 +A92 A94 +A93 A94 +A93 A95 +A93 A97 +A93 A98 +A94 A95 +A94 A96 +A94 A97 +A94 A98 +A95 A96 +A95 A97 +A95 A98 +A95 A137 +A96 A97 +A96 A98 +A96 A99 +A96 A100 +A97 A98 +A97 A99 +A97 A100 +A97 A101 +A98 A99 +A98 A100 +A98 A101 +A98 A102 +A98 A133 +A99 A100 +A99 A101 +A99 A102 +A99 A103 +A100 A101 +A100 A102 +A100 A103 +A100 A104 +A101 A102 +A101 A103 +A101 A104 +A101 A105 +A102 A103 +A102 A104 +A102 A105 +A102 A129 +A102 A130 +A102 A133 +A103 A104 +A103 A105 +A103 A106 +A104 A105 +A104 A106 +A104 A107 +A104 A108 +A105 A106 +A105 A107 +A105 A108 +A105 A109 +A106 A107 +A106 A108 +A106 A109 +A106 A110 +A107 A108 +A107 A109 +A107 A110 +A107 A111 +A108 A109 +A108 A110 +A108 A111 +A108 A112 +A109 A110 +A109 A111 +A109 A112 +A109 A113 +A109 A117 +A110 A111 +A110 A112 +A110 A113 +A110 A114 +A110 A117 +A111 A112 +A111 A113 +A111 A114 +A112 A113 +A112 A114 +A113 A114 +A113 A115 +A113 A116 +A113 A117 +A114 A115 +A114 A116 +A114 A117 +A115 A116 +A115 A117 +A115 A118 +A116 A117 +A116 A118 +A117 A118 +A117 A119 +A117 A121 +A117 A122 +A118 A119 +A118 A120 +A118 A121 +A118 A122 +A119 A120 +A119 A121 +A119 A122 +A119 A123 +A120 A121 +A120 A122 +A120 A123 +A120 A124 +A121 A122 +A121 A123 +A121 A124 +A121 A125 +A122 A123 +A122 A124 +A122 A125 +A122 A126 +A123 A124 +A123 A125 +A123 A126 +A123 A127 +A124 A125 +A124 A126 +A124 A127 +A124 A128 +A125 A126 +A125 A127 +A125 A128 +A125 A129 +A126 A127 +A126 A128 +A126 A129 +A126 A130 +A127 A128 +A127 A129 +A127 A130 +A127 A131 +A128 A129 +A128 A130 +A128 A131 +A128 A132 +A129 A130 +A129 A131 +A129 A132 +A129 A133 +A130 A131 +A130 A132 +A130 A133 +A130 A134 +A131 A132 +A131 A133 +A131 A134 +A131 A135 +A132 A133 +A132 A134 +A132 A135 +A132 A136 +A133 A134 +A133 A135 +A133 A136 +A134 A135 +A134 A136 +A134 A137 +A134 A138 +A135 A136 +A135 A137 +A135 A138 +A136 A137 +A136 A138 +A136 A139 +A137 A138 +A137 A139 +A137 A140 +A138 A139 +A138 A140 +A139 A140 +A139 A141 +A140 A141 diff --git a/content/algorithms/sub_isomorphism/img/1FN3.png b/content/algorithms/sub_isomorphism/img/1FN3.png new file mode 100644 index 00000000..72c7aff3 Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/1FN3.png differ diff --git a/content/algorithms/sub_isomorphism/img/1crn.png b/content/algorithms/sub_isomorphism/img/1crn.png new file mode 100644 index 00000000..01993ca1 Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/1crn.png differ diff --git a/content/algorithms/sub_isomorphism/img/1fn3.png b/content/algorithms/sub_isomorphism/img/1fn3.png new file mode 100644 index 00000000..2666839b Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/1fn3.png differ diff --git a/content/algorithms/sub_isomorphism/img/2_structure.png b/content/algorithms/sub_isomorphism/img/2_structure.png new file mode 100644 index 00000000..665e882c Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/2_structure.png differ diff --git a/content/algorithms/sub_isomorphism/img/nocopy.jpg b/content/algorithms/sub_isomorphism/img/nocopy.jpg new file mode 100644 index 00000000..d945bdc8 Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/nocopy.jpg differ diff --git a/content/algorithms/sub_isomorphism/img/nocopy.png b/content/algorithms/sub_isomorphism/img/nocopy.png new file mode 100644 index 00000000..f9d9d49d Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/nocopy.png differ diff --git a/content/algorithms/sub_isomorphism/img/nocopy_150x100.png b/content/algorithms/sub_isomorphism/img/nocopy_150x100.png new file mode 100644 index 00000000..17aa29dd Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/nocopy_150x100.png differ diff --git a/content/algorithms/sub_isomorphism/img/nocopy_150x100_p.png b/content/algorithms/sub_isomorphism/img/nocopy_150x100_p.png new file mode 100644 index 00000000..85fa61b3 Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/nocopy_150x100_p.png differ diff --git a/content/algorithms/sub_isomorphism/img/phone.png b/content/algorithms/sub_isomorphism/img/phone.png new file mode 100644 index 00000000..ba561ed8 Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/phone.png differ diff --git a/content/algorithms/sub_isomorphism/img/phone_color.png b/content/algorithms/sub_isomorphism/img/phone_color.png new file mode 100644 index 00000000..2d54b88d Binary files /dev/null and b/content/algorithms/sub_isomorphism/img/phone_color.png differ diff --git a/content/algorithms/sub_isomorphism/subgraph_iso_img.md b/content/algorithms/sub_isomorphism/subgraph_iso_img.md new file mode 100644 index 00000000..b1b0dad2 --- /dev/null +++ b/content/algorithms/sub_isomorphism/subgraph_iso_img.md @@ -0,0 +1,363 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.14.2 +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Subgraph Isomorphism: Real world applications + ++++ + +First let's define the Subgraph Isomorphism problem: + +Let $H$ and $G$ be graphs. $H$ is a subgraph of $G = (V, E)$ if + +$\exists \hspace{0.1cm}G_o = (V_o, E_o)$ / $V_o \subseteq V \wedge E_o \subseteq E \cap V_o \times V_o \land G_o \hspace{0.1cm}$ and $H$ are isomorphic + ++++ + +Don't be scared of the math formula we can break this up to fully understand the problem. + +The main idea of this definition is that $H$ is a subgraph isomorphism of $G$ if we can find in $G$ a graph $G_o$ that is isomorphic with $H$. For $G_o$ to be in $G$ we need the nodes and edges in $G_o$ to be present in $G$. Formally, We need that $V_o$ (the nodes of $G_o$) to be a subset of $V$ (the nodes of $G$). Similarly, we need to ask that $E_o$ (the edges of $G_o$) are a subset of the edges of $G$, and the edges in $E_o$ only involve nodes that are present in $V_o$. + +This can be better understood with an example. Let's build a particular case of $G$, $G_o$ and, $H$. + +```{code-cell} +import matplotlib.pyplot as plt +import numpy as np +import networkx as nx + +# Create a lollipop graph +G = nx.lollipop_graph(5, 2) + +# Create a graph that is a subgraph of G +G_o = nx.complete_graph(5) + +# Create a graph H that is isomorphic with G_o +H = nx.complete_graph(["a", "b", "c", "d", "e"]) + +plt.subplot(131) +plt.title("G", fontweight="bold") +# Define colors for the nodes of G: the nodes in sub_G are blue and the others are yellow +colors = ["c" for i in range(0, 5)] + ["y" for i in range(5, 7)] +nx.draw_spring(G, with_labels=True, node_color=colors) # Plot G + +plt.subplot(132) + +plt.title("Subgraph of G($G_o$)", fontweight="bold") +nx.draw_circular(G_o, with_labels=True, node_color="c") # Plot sub_G + +plt.subplot(133) +plt.title("H", fontweight="bold") +nx.draw_circular(H, with_labels=True, node_color="m") # Plot H +``` + +Many real-world structures can be represented using graphs. Some of them are involved in problems that can be solved using sub-graph isomorphism algorithms. In this notebook we will study two real-world applications: +- Pattern Recognition in images +- Proteins classification by structure + ++++ + +## Where's my phone? + ++++ + +Let's use subgraph isomorphism to find our phone in our messy desk. First, these are the images of our desk and phone: + +```{code-cell} +plt.subplot(121) +m = plt.imread("img/nocopy_150x100.png") +plt.imshow(m) +plt.title("Messy desk") + +plt.subplot(122) +c = plt.imread("img/phone_color.png") +plt.imshow(c) +plt.title("Indestructible phone") +plt.show() +``` + +Image Source: + ++++ + +To simplify this problem, let's convert these images into grayscale. After this, the images will be a 2d array where each position represents a pixel. +Also, let's define 0 (black pixel) as a null pixel. So we will have to delete all null pixels from our desk image. This will let us easily delete the background in the phone image. + +```{code-cell} +def rgb2gray(rgb): + return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140]) +``` + +```{code-cell} +# Preprocessing: Delete black pixels +I = rgb2gray(plt.imread("img/nocopy_150x100.png")) +for i in range(0, I.shape[0]): + for j in range(0, I.shape[1]): + if I[i][j] == 0: + I[i][j] = 0.0001 +plt.imsave("img/nocopy_150x100_p.png", I) +``` + +These will be the final images: + +```{code-cell} +plt.subplot(121) + +I = rgb2gray(plt.imread("img/nocopy_150x100_p.png")) # Desk +plt.imshow(I, cmap="gray") +plt.title("Messy desk") + +plt.subplot(122) + +I2 = rgb2gray(plt.imread("img/phone.png")) # Object to find +plt.imshow(I2, cmap="gray") +plt.title("Indestructible phone") +plt.show() +``` + +### Using Graphs to represent images + ++++ + +For this, we can use *grid_2d_graph(n, m)*. This function creates a $nxm$ grid where each coordinate is a node and each node is connected to its four nearest neighbors. + +For the phone graph, we will delete all the nodes corresponding to null pixels as they represent the background of the image. + +```{code-cell} +desk_graph = nx.grid_2d_graph(I.shape[0], I.shape[1]) +phone_graph = nx.grid_2d_graph(I2.shape[0], I2.shape[1]) + +# Define the node attribute weight as pixel values in desk_graph +for i in range(0, I.shape[0]): + for j in range(0, I.shape[1]): + desk_graph.nodes[(i, j)]["weight"] = I[i][j] + +# Define the node attribute "weight" as pixel values in phone_graph +# and delete useless nodes +for i in range(0, I2.shape[0]): + for j in range(0, I2.shape[1]): + if I2[i][j] != 0: + phone_graph.nodes[(i, j)]["weight"] = I[i][j] + else: + phone_graph.remove_node((i, j)) +``` + +Let's see how the phone_graph looks like coloring the nodes. + +```{code-cell} +import matplotlib.cm as cm +from matplotlib.colors import Normalize, rgb2hex + +# Match values to colors in RGB +def color_map_color(value, cmap_name="gray", vmin=0, vmax=1): + norm = Normalize(vmin=vmin, vmax=vmax) + cmap = cm.get_cmap(cmap_name) + rgb = cmap(norm(abs(value)))[:3] + color = rgb2hex(rgb) + return color + + +# Get RGB colors of the phone photo +I2_colors = [] +for i in range(0, I2.shape[0]): + for j in range(0, I2.shape[1]): + if I2[i][j] != 0: # ignore null pixels + I2_colors.append(color_map_color(I2[i][j])) + +nx.draw_kamada_kawai(phone_graph, node_color=I2_colors) +``` + +Finally, let's see if we can match our phone graph in the desk graph. For this, we'll use the function *subgraph_is_isomorphic()* from the isomorphism module. To consider the node attribute "weight" in the matching we need to use the function *numerical_node_match(attribute, tol)*. + +```{code-cell} +import networkx.algorithms.isomorphism as iso + +# Match node weight attributes with tolerance 10^-10 +em = iso.numerical_node_match("weight", 10 ^ -10) + +g = iso.GraphMatcher(desk_graph, phone_graph, node_match=em) + +g.subgraph_is_isomorphic() +``` + +We could successfully match the phone graph with the desk graph. This means that the phone graph is a subgraph isomorphism of the desk graph. More formally, this means that there's a possible matching function between the nodes of the phone graph and the nodes of the desk graph. We could use the matching to locate the nodes corresponding to the phone in the desk graph then the node labels are the pixel coordinates in the original picture. + ++++ + +Also, we can check that the desk and phone graphs are not isomorphic. + +```{code-cell} +nx.is_isomorphic(desk_graph, phone_graph, node_match=em) +``` + +## Proteins Secondary structure + ++++ + +Proteins are large biomolecules and macromolecules that comprise one or more long chains of amino acid residues. Proteins are key in many biological processes within organisms. + +Proteins differ from one another primarily in their sequence of amino acids, which is dictated by the nucleotide sequence of their genes, and which usually results in protein folding into a specific 3D structure that determines its activity. + +Biochemists often refer to four distinct aspects of a protein's structure. We will be interested in the secondary and tertiary structure. + +**Secondary Structure** : regularly repeating local structures stabilized by hydrogen bonds. The most common examples are the α-helix, β-sheet, and turns. Because secondary structures are local, many regions of different secondary structures can be present in the same protein molecule. + +```{code-cell} +plt.imshow(plt.imread("img/2_structure.png")) +plt.title("Secondary Structure", fontweight="bold") +plt.axis("off") +plt.rcParams["figure.figsize"] = (2, 2) +plt.show() +``` + +Image Source: + ++++ + +**Tertiary Structure** : is the 3D shape of a protein. This structure have information of the interactions between the R groups of the amino acids that make up the protein. Let's see how some proteins tertiary structure looks like: + +```{code-cell} +plt.rcParams["figure.figsize"] = [7, 7] + +plt.subplot(121) +plt.imshow(plt.imread("img/1fn3.png")) +plt.title("1FN3 Tertiary Structure", fontweight="bold") +plt.axis("off") + +plt.subplot(122) +plt.imshow(plt.imread("img/1crn.png")) +plt.title("1CRN Tertiary Structure", fontweight="bold") +plt.axis("off") +plt.show() +``` + +Image Source: + ++++ + +Many graph representations can be built from the Secondary and Tertiary Structure. Each representation capture different information about the protein. For example, +- Use α-helix and β-sheet as nodes and the interaction energy between them to define edges. + +- $C_\alpha$ Networks: $C_\alpha$ atom of an amino acid residue is considered a node and an edge are drawn if the $C_\alpha$ distance between a pair of residues is within a threshold distance. $C_\alpha$ atoms are in the $\alpha$-helix structure. It is one of the simplest and most widely analyzed protein contact networks that captures very well the 3D topology of protein structure. + ++++ + +We will work on $C_\alpha$ networks. We will read the networks from edgelists. + +Data Source: + +Let's see graphs of proteins with PDB codes 1CRN(Plant protein), 1FN3(oxygen storage/transport in the human body) and 1EGJ(Plant Protein). + +```{code-cell} +p_1CRN = nx.read_edgelist("data/1CRN_edgelist.txt", nodetype=str) +nx.draw_spring(p_1CRN, node_color="g") +plt.title("1CRN Plant Protein", fontweight="bold") +plt.show() +``` + +```{code-cell} +p_1FN3 = nx.read_edgelist("data/1FN3_edgelist.txt", nodetype=str) +nx.draw_spring(p_1FN3, node_color="r") +plt.title("1FN3 oxygen storage/transport Protein", fontweight="bold") +plt.show() +``` + +```{code-cell} +p_1EJG = nx.read_edgelist("data/1EJG_edgelist.txt", nodetype=str) +nx.draw_spring(p_1EJG, node_color="g") +plt.title("1EJG Plant Protein", fontweight="bold") +plt.show() +``` + +We can identify graphlets (induced subgraphs) that are present in these graphs and use that to classify proteins. We can extract some subgraphs from the proteins and test if they are present in other proteins. Let's find a subgraph of 1CRN that is also a subgraph of 1EJG but not of 1FN3. This is interenting because 1CRN and 1EJG are both plant proteins but 1FN3 is not. But clearly to decide if this graphlet is particular of plant proteins we should test it in more proteins. + +```{code-cell} +# Get a induced subgraph from 1CRN +graphlet = p_1CRN.subgraph(["A" + str(i) for i in range(0, 30)]) +nx.draw(graphlet) + +# Test if the graphlet is a subgraph of each protein +g1 = iso.GraphMatcher(p_1CRN, graphlet) +print("Graphlet present in 1CRN ", g1.subgraph_is_isomorphic()) + +g2 = iso.GraphMatcher(p_1FN3, graphlet) +print("Graphlet present in 1FN3", g2.subgraph_is_isomorphic()) + +g3 = iso.GraphMatcher(p_1EJG, graphlet) +print("Graphlet present in 1EJG", g3.subgraph_is_isomorphic()) +``` + +On a similar way we can find a graphlet in 1FN3 that is not present in 1CRN and 1EJG. + +```{code-cell} +# Get a induced subgraph from 1FN3 +graphlet = p_1FN3.subgraph(["A" + str(i) for i in range(100, 110, 1)]) +plt.rcParams["figure.figsize"] = (5, 5) +nx.draw(graphlet) + +# Test if the graphlet is a subgraph of each protein +g1 = iso.GraphMatcher(p_1CRN, graphlet) +print("Graphlet present in 1CRN ", g1.subgraph_is_isomorphic()) + +g2 = iso.GraphMatcher(p_1FN3, graphlet) +print("Graphlet present in 1FN3", g2.subgraph_is_isomorphic()) + +g3 = iso.GraphMatcher(p_1EJG, graphlet) +print("Graphlet present in 1EJG", g3.subgraph_is_isomorphic()) +``` + +Another option is to use randomly generated graphs given a certain number of nodes, for example, using Erdos-graphs generators. *erdos_renyi_graph(n, p, seed)* generates a graph of n nodes in which all possible edges are added with probability p. We can find a random graphs that is present in 1FN3 but not in 1CRN and 1EJG. + +```{code-cell} +random_graph = nx.erdos_renyi_graph(7, 0.9, seed=8) +nx.draw(graphlet) + +# Test if the random graph is a subgraph of each protein +g1 = iso.GraphMatcher(p_1CRN, random_graph) +print("Random graph present in 1CRN ", g1.subgraph_is_isomorphic()) + +g2 = iso.GraphMatcher(p_1FN3, random_graph) +print("Random graph present in 1FN3", g2.subgraph_is_isomorphic()) + +g3 = iso.GraphMatcher(p_1EJG, random_graph) +print("Random graph present in 1EJG", g3.subgraph_is_isomorphic()) +``` + +Also there are some graphlets and random graphs that are present in all proteins, for example: + +```{code-cell} +random_graph = nx.erdos_renyi_graph(5, 0.9, seed=8) +nx.draw(graphlet) + +# Test if the random graph is a subgraph of each protein +g1 = iso.GraphMatcher(p_1CRN, random_graph) +print("Random graph present in 1CRN ", g1.subgraph_is_isomorphic()) + +g2 = iso.GraphMatcher(p_1FN3, random_graph) +print("Random graph present in 1FN3", g2.subgraph_is_isomorphic()) + +g3 = iso.GraphMatcher(p_1EJG, random_graph) +print("Random graph present in 1EJG", g3.subgraph_is_isomorphic()) +``` + +This technique can have many applications, for example, building tree-classification models. But in order to find graphlets and random graphs that are useful to clasify proteins it's important to test multiple proteins and also identify if those graphs are more present in some proteins than in random graphs. + ++++ + +### References +- +- +- +- Chakrabarty B. and Parekh N., "NAPS: Network Analysis of Protein Structures", Nucleic Acids Research, 2016, Vol. 44, Web Server issue W375–W382 + +- Vishveshwara S.,Brinda K. V. and Kannan N., "Protein Structure: Insights from graph theory", Journal of Theoretical and Computational Chemistry, Vol. 1, No. 1 (2002) 000–000. +- Henneges C., Röttig M., Kohlbacher O., Zell A., "Graphlet data mining of energetical interaction patterns in protein 3D structures", ICFC-ICNC 2010 - Proceedings of the International Conference on Fuzzy Computation and International Conference on Neural Computation, Valencia, Spain, October 24-26, 2010 + diff --git a/content/exploratory_notebooks/facebook_notebook.md b/content/exploratory_notebooks/facebook_notebook.md index 4e613b33..66721314 100644 --- a/content/exploratory_notebooks/facebook_notebook.md +++ b/content/exploratory_notebooks/facebook_notebook.md @@ -5,7 +5,7 @@ jupytext: extension: .md format_name: myst format_version: 0.13 - jupytext_version: 1.13.8 + jupytext_version: 1.14.2 kernelspec: display_name: Python 3 language: python diff --git a/content/generators/geometric.md b/content/generators/geometric.md index 681f0b5a..8055c2a7 100644 --- a/content/generators/geometric.md +++ b/content/generators/geometric.md @@ -5,7 +5,7 @@ jupytext: extension: .md format_name: myst format_version: 0.13 - jupytext_version: 1.13.8 + jupytext_version: 1.14.2 kernelspec: display_name: Python 3 language: python diff --git a/content/generators/index.md b/content/generators/index.md index ad4d3028..94d9991d 100644 --- a/content/generators/index.md +++ b/content/generators/index.md @@ -1,5 +1,10 @@ + ++++ + ++++ + # Graph Generators A closer look at the functions provided by NetworkX to create interesting diff --git a/content/generators/sudoku.md b/content/generators/sudoku.md index a4eef879..61f355e4 100644 --- a/content/generators/sudoku.md +++ b/content/generators/sudoku.md @@ -5,7 +5,7 @@ jupytext: extension: .md format_name: myst format_version: 0.13 - jupytext_version: 1.13.8 + jupytext_version: 1.14.2 kernelspec: display_name: Python 3 (ipykernel) language: python