diff --git a/build/lib/hotspots/__init__.py b/build/lib/hotspots/__init__.py index c8c5a079..3f15e846 100644 --- a/build/lib/hotspots/__init__.py +++ b/build/lib/hotspots/__init__.py @@ -5,7 +5,7 @@ __copyright__ = None __credits__ = None __license__ = None -__version__ = "1.0.0" +__version__ = "1.0.2" __maintainer__ = "Peter Curran" __email__ = "pcurran@ccdc.cam.ac.uk" __status__ = "Development" diff --git a/build/lib/hotspots/grid_extension.py b/build/lib/hotspots/grid_extension.py index 45df5909..dd6a4063 100644 --- a/build/lib/hotspots/grid_extension.py +++ b/build/lib/hotspots/grid_extension.py @@ -38,6 +38,15 @@ class Grid(utilities.Grid): """ A class to extend a `ccdc.utilities.Grid` this provides grid methods required in the Fragment Hotspot Maps algorithm """ + def coordinates(self, threshold=1): + nx, ny, nz = self.nsteps + return [self.indices_to_point(i, j, k) + for i in range(nx) + for j in range(ny) + for k in range(nz) + if self.value(i, j, k) >= threshold] + + def grid_value_by_coordinates(self, threshold=1): """ returns a dictionary of grid point values by coordinates @@ -657,6 +666,7 @@ def value_at_coordinate(self, coordinates, tolerance=1, position=True): else: return score + utilities.Grid = Grid diff --git a/build/lib/hotspots/hs_docking.py b/build/lib/hotspots/hs_docking.py index d5387e5d..a31ce948 100644 --- a/build/lib/hotspots/hs_docking.py +++ b/build/lib/hotspots/hs_docking.py @@ -84,7 +84,7 @@ def __init__(self, atoms, weight=5.0, min_hbond_score=0.001, _constraint=None): super(self.__class__, self).__init__(_constraint) @staticmethod - def from_file(path, protein, weight, min_hbond_score=0.2, max=2): + def from_file(path, protein, weight, min_hbond_score=0.001, max=2): """ create a hotspot constraint from file diff --git a/build/lib/hotspots/hs_pharmacophore.py b/build/lib/hotspots/hs_pharmacophore.py index b96f0e7a..e9a02000 100644 --- a/build/lib/hotspots/hs_pharmacophore.py +++ b/build/lib/hotspots/hs_pharmacophore.py @@ -109,6 +109,28 @@ def __init__(self, settings, identifier=None, features=None, protein=None, dic=N self.dic = dic + def _usr_moment(self): + """ + generates USR moments for shape analysis + :return: + """ + try: + from hotspots.hs_utilities import _generate_usr_moment + type_dic = {"apolar" : [], + "acceptor": [], + "donor": [], + "positive": [], + "negative": [] + } + + for feat in self.features: + type_dic[feat.feature_type].append(feat.feature_coordinates) + + coords_list = [np.array(v) for v in type_dic.values() if len(v) != 0] + return _generate_usr_moment(fcoords_list=coords_list) + except ImportError: + print("To use this feature you must have USR installed") + @property def features(self): """ @@ -372,6 +394,9 @@ def _get_ligands(results): def _cluster_ligands(n, ligands): """ !!! OUT OF DATE !!! + + + Use clustering in cluster_img.py (Not public yet) :return: """ cluster_dict = {a: [] for a in range(int(n))} diff --git a/build/lib/hotspots/hs_utilities.py b/build/lib/hotspots/hs_utilities.py index bd1c3000..4ee97997 100644 --- a/build/lib/hotspots/hs_utilities.py +++ b/build/lib/hotspots/hs_utilities.py @@ -25,9 +25,45 @@ from ccdc.io import MoleculeWriter from ccdc.molecule import Molecule, Atom + Coordinates = collections.namedtuple('Coordinates', ['x', 'y', 'z']) +def _generate_usr_moment(fcoords_list): + """ + PRIVATE EXPERIMENTAL FEATURE + + Modification of Adrian Schreyer's code https://bitbucket.org/aschreyer/usrcat + + More information on the USR algorithm: + - https://doi.org/10.1098/rspa.2007.1823 + + :param grds: + + :return: + """ + from usrcat.geometry import usr_moments, usr_moments_with_existing + + all_coords = np.concatenate([c for c in fcoords_list if len(c) != 0]) + #np.vstack(fcoords_list) + (ctd, cst, fct, ftf), om = usr_moments(all_coords) + + for fcoords in fcoords_list: + + # initial zeroed out USRCAT feature moments + fm = np.zeros(12) + + # only attempt to generate moments if there are enough atoms available! + if len(fcoords): + fm = usr_moments_with_existing(fcoords, ctd, cst, fct, ftf) + + # append feature moments to the existing ones + om = np.append(om, fm) + + return np.array([om]) + + + class Helper(object): """ A class to handle miscellaneous functionality diff --git a/build/lib/hotspots/result.py b/build/lib/hotspots/result.py index 9ce7cddf..d696787d 100644 --- a/build/lib/hotspots/result.py +++ b/build/lib/hotspots/result.py @@ -47,9 +47,9 @@ from hotspots.grid_extension import Grid, _GridEnsemble from hotspots.hs_utilities import Figures from hotspots.template_strings import pymol_imports -from hs_pharmacophore import PharmacophoreModel -from hs_utilities import Helper -from atomic_hotspot_calculation import AtomicHotspot, _AtomicHotspotResult +from hotspots.hs_utilities import Helper +from hotspots.hs_pharmacophore import PharmacophoreModel +from hotspots.atomic_hotspot_calculation import AtomicHotspot, _AtomicHotspotResult class _Scorer(Helper): @@ -60,6 +60,7 @@ class _Scorer(Helper): :param obj: either `ccdc.molecule.Molecule` or `ccdc.protein.Protein` :param int tolerance: search distance """ + def __init__(self, hotspot_result, obj, tolerance): self.hotspot_result = hotspot_result self.object = obj @@ -151,6 +152,7 @@ def _score_protein_backup(self, prot): :return: """ + def fetch_scores(atom, grid, tolerance=4): try: return max(self.hotspot_result.super_grids[grid].get_near_scores(coordinate=atom.coordinates, @@ -222,11 +224,9 @@ def score_molecule(self): def _score_feature(self, f): - ideal_coord = (f.coordinates[n] + 1.8*(f.protein_vector[n]) for n in xrange(0,2)) + ideal_coord = (f.coordinates[n] + 1.8 * (f.protein_vector[n]) for n in xrange(0, 2)) print(ideal_coord) - - def score_cavity(self): # TODO: return scored cavity _features, the score protein function should be enough tbh cav = copy.copy(self.scored_object) @@ -234,7 +234,6 @@ def score_cavity(self): for f in cav.features: self._score_feature(f) - def score_hotspot(self, threshold=5, percentile=50): """ Hotspot scored on the median value of all points included in the hotspot. @@ -343,14 +342,26 @@ def __init__(self, super_grids, protein, buriedness=None, pharmacophore=None): if pharmacophore: self.pharmacophore = self.get_pharmacophore_model() - class ConstraintData(object): """ standardise constrain read and write (required for the GOLD optimisation) """ - def __init__(self, score_by_index): + + def __init__(self, score_by_index, prot=None): self.score_by_index = OrderedDict(score_by_index) + self.protein = prot + + def to_molecule(self, protein=None): + if self.protein is None: + if protein is None: + raise AttributeError("Give me a protein") + mol = Molecule(identifier="constraints") + for score, index in self.score_by_index.items(): + atm = self.protein.atoms[index] + atm.label = str(score) + mol.add_atom(atm) + return mol def write(self, path): f = open(path, "wb") @@ -512,7 +523,7 @@ def filter_point(x, y, z): [g[x + i][y + j][z + k] for i in range(-tol, tol + 1) for j in range(-tol, tol + 1) for k in range(-tol, tol + 1)]) if loc_arr[loc_arr > 0].size != 0: - #print(np.mean(loc_arr[loc_arr > 0])) + # print(np.mean(loc_arr[loc_arr > 0])) new_grid[x][y][z] = np.mean(loc_arr[loc_arr > 0]) vfilter_point = np.vectorize(filter_point) @@ -666,34 +677,117 @@ def docking_fitting_pts(self, _best_island=None, threshold=17): mol.add_atom(atom=atm) return mol - def docking_constraint_atoms(self, max_constraints=5): + def _output_feature_centroid(self): + dic = {"apolar": "C", + "acceptor": "N", + "donor": "O"} + mol = Molecule(identifier="centroids") + for i, feat in enumerate(self.features): + coords = feat.grid.centroid() + mol.add_atom(Atom(atomic_symbol=dic[feat.feature_type], + atomic_number=14, + coordinates=coords, + label=str(i))) + from ccdc import io + with io.MoleculeWriter("cenroid.mol2") as w: + w.write(mol) + + def docking_constraint_atoms(self, max_constraints=10, accessible_cutoff=0.001, max_distance=4): """ creates a dictionary of constraints :param int max_constraints: max number of constraints :return dic: score by atom """ - for atm in self.protein.atoms: - atm.partial_charge = int(0) - prot = self.score(self.protein) - coords = np.array([a.coordinates for a in prot.atoms]) - atm_dic = {atm.partial_charge: atm.index for atm in prot.atoms - if type(atm.partial_charge) is float - and ((atm.atomic_number == 1 and atm.neighbours[0].is_donor) or atm.is_acceptor) - and self._is_solvent_accessible(coords, atm, min_distance=2) - } + from hotspots.hs_utilities import Helper + from scipy.spatial import distance + + point = self.super_grids['apolar'].centroid() + bs = self.protein.BindingSiteFromPoint(protein=self.protein, origin=point, distance=12.) + p = self.protein.copy() + for a in p.atoms: + a.label = str(a.index) + + remove = set(p.residues) - set(bs.residues) + + for r in remove: + p.remove_residue(r.identifier) - if len(atm_dic) > max_constraints: - scores = sorted([f[0] for f in atm_dic.items()], reverse=True)[:max_constraints] + donors = {a.coordinates: a.label for a in p.atoms + if (a.atomic_number == 1 and a.neighbours[0].is_donor and a.solvent_accessible_surface() > accessible_cutoff) + } + + acceptors = {a.coordinates: a.label for a in p.atoms + if a.is_acceptor and a.solvent_accessible_surface() > accessible_cutoff + } + pairs = {"acceptor": donors, + "donor": acceptors} + + constraint_dic = {} + + for feature in self.features: + centoid = [feature.grid.centroid()] + coords = pairs[feature.feature_type].keys() + all_distances = distance.cdist(coords, centoid, 'euclidean') + ind = int(np.argmin(all_distances)) + min_coord = coords[ind] + atm_index = int(pairs[feature.feature_type][min_coord]) + + if all_distances[ind] < max_distance: + constraint_dic.update({feature.score_value: atm_index}) + + if len(constraint_dic) > max_constraints: + scores = sorted([f[0] for f in constraint_dic.items()], reverse=True)[:max_constraints] else: - scores = sorted([f[0] for f in atm_dic.items()], reverse=True) + scores = sorted([f[0] for f in constraint_dic.items()], reverse=True) - bin_keys = set(atm_dic.keys()) - set(scores) + bin_keys = set(constraint_dic.keys()) - set(scores) for b in bin_keys: - del atm_dic[b] + del constraint_dic[b] + + return self.ConstraintData(constraint_dic, self.protein) + + # for atm in self.protein.atoms: + # atm.partial_charge = int(0) + # prot = self.score(self.protein) + # + # coords = np.array([a.coordinates for a in prot.atoms]) + # atm_dic = {atm.partial_charge: atm.index for atm in prot.atoms + # if type(atm.partial_charge) is float + # and ((atm.atomic_number == 1 and atm.neighbours[0].is_donor) or atm.is_acceptor) + # and self._is_solvent_accessible(coords, atm, min_distance=2) + # } + # + # if len(atm_dic) > max_constraints: + # scores = sorted([f[0] for f in atm_dic.items()], reverse=True)[:max_constraints] + # else: + # scores = sorted([f[0] for f in atm_dic.items()], reverse=True) + # + # bin_keys = set(atm_dic.keys()) - set(scores) + # for b in bin_keys: + # del atm_dic[b] + # + # return self.ConstraintData(atm_dic, self.protein) + + def _usr_moment(self, threshold=14): + """ + PRIVATE + This is some experimental code and requires seperate installation of USR + generates USR moments for shape analysis + :return: + """ + try: + from hs_utilities import _generate_usr_moment + + coords_list = [np.array(g.coordinates(threshold=threshold)) + for p, g in self.super_grids.items() + if p != "negative" and p != "positive"] - return self.ConstraintData(atm_dic) + return _generate_usr_moment(fcoords_list=coords_list) + + except ImportError: + print("To use this feature you must have USR installed") def map_values(self): """ @@ -797,8 +891,8 @@ def _get_superstar_profile(self, feature_radius=1.5, nthreads=6, features=None, g = ss_dict[shortest] feat.superstar_results.append(_AtomicHotspotResult(identifier=r.identifier, - grid=g, - buriedness=None) + grid=g, + buriedness=None) ) score.append(g.grid_score(threshold=1, percentile=50)) @@ -1229,8 +1323,8 @@ def _get_interaction_type(self, mask_dic, best_island, threshold, settings): features_in_vol = {p: g * (g & common_best_island) for p, g in mask_dic.items()} location = features_in_vol["apolar"] features = self.hotspot_result._get_features(interaction_dict=features_in_vol, - threshold=threshold, - min_feature_gp=settings.min_feature_gp) + threshold=threshold, + min_feature_gp=settings.min_feature_gp) return location, features @staticmethod @@ -1311,7 +1405,6 @@ def _get_grid_dict(location, features, settings): # # feat.superstar_profile = super_profile - @property def single_grid(self): return self._single_grid @@ -1451,7 +1544,7 @@ def _rank_extracted_hotspots(self): self.extracted_hotspots = [value for (key, value) in sorted(extracted_hotspots_by_rank.items())] for i, hs in enumerate(self.extracted_hotspots): - hs.identifier = hs.score_value #"rank_{}".format(hs.rank) + hs.identifier = hs.score_value # "rank_{}".format(hs.rank) print("rank", hs.rank, "score", hs.score_value) def _select_cavity_grids(self, cavs): diff --git a/examples/1_tractibility/tractability.png b/examples/1_tractibility/tractability.png index 724ef751..20fa89dc 100644 Binary files a/examples/1_tractibility/tractability.png and b/examples/1_tractibility/tractability.png differ diff --git a/examples/1_tractibility/visualise.py b/examples/1_tractibility/visualise.py index cdd1dd85..46447d5c 100644 --- a/examples/1_tractibility/visualise.py +++ b/examples/1_tractibility/visualise.py @@ -9,8 +9,11 @@ sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)}, font_scale=6) -pal = [(117/255, 216/255, 213/255), - (211/255, 60/255, 96/255)] +# pal = [(117/255, 216/255, 213/255), +# (211/255, 60/255, 96/255)] + +pal = [(202/255, 41/255, 54/255), + (140/255, 157/255, 196/255)] # Get Data fname = "scores.csv" diff --git a/hotspots/excluded_het_id.txt b/hotspots/excluded_het_id.txt new file mode 100644 index 00000000..e925af38 --- /dev/null +++ b/hotspots/excluded_het_id.txt @@ -0,0 +1,298 @@ +3CO +BUQ +NAG +NAD +CR +SF4 +EOH +ZNO +NAO +EOM +EHN +NAP +CCN +NAW +BR +EGL +NI2 +GSH +NI1 +O2 +BA +RU +SAH +GU7 +SAM +TAS +DCE +2BM +TP7 +OF3 +OF1 +RB +IOH +MW1 +IOD +C2O +BNZ +TCN +ARS +NH4 +GD +PER +GA +TPP +CHX +CME +THB +IPA +CD1 +OH +SO4 +DTT +PQN +CYN +PQQ +PYJ +PEO +NA6 +MBR +NA5 +OS +MAN +CMO +OCL +DMF +OCN +MO3 +NGN +ACT +U1 +HDZ +MO5 +MO4 +VO4 +DMS +FUC +PCL +CB5 +EEE +HG +NO2 +CMP +PR +BMA +IUM +PT +ZN2 +TTP +NO3 +YT3 +TYS +PB +M2M +ZO3 +PD +AMP +PI +MH3 +AF3 +ZN +MN3 +OXY +NI +CSD +OX +PS5 +MN5 +MN6 +S +HOH +W +SB +FOL +OXE +PT4 +PBM +O +MW2 +MG +543 +MSM +C5P +ANL +MTO +NO +TBU +OPY +PC4 +GU3 +GU2 +GU1 +MOH +ANP +GU6 +GU5 +GU4 +AU +OC3 +BTN +I42 +OC4 +OC7 +OC6 +TMP +RE +GD3 +CTP +ACE +3OF +ETZ +MM4 +IN +ACN +DOD +AST +COA +EU +DOX +COB +B12 +REO +ATP +CD3 +U10 +ACY +PEG +YB +NDP +NBZ +ETI +SER +C2C +NA +FMT +ASC +AU3 +FE2 +LNK +SEK +MO1 +EU3 +1BO +AUC +CLO +FE +DUM +ADP +OF2 +BEF +FEL +BF4 +HEX +CUZ +NDG +XE +FMN +YAN +CUA +V +CUO +HEM +GMP +CU +MGF +GDP +CFT +SBT +PLP +SR +FU1 +EDN +EDO +H2S +ND4 +BRO +KR +CS +NME +CDP +HGI +SM +ALY +NMO +TDP +SE +HO +3CN +AZI +F42 +FLO +6MO +EMC +Y1 +MO7 +SE4 +BF2 +CO +NGD +2MO +202 +DIS +MBN +LA +PGO +CL +HP6 +SO2 +LI +PPS +TPO +POL +GU0 +SGM +DTU +MOO +TE +TB +CA +FAD +CNV +GOL +SCN +AG +PO4 +IR +DIO +NH2 +8CL +3NI +IRI +UTP +AR +N4M +CE +NH3 +MN +CNN +HGC +GU8 +GTP +UDP +OC2 +ART +TFH +MCH +2NO +6WO +CD5 +KCX +E1H +ARF +TL +DXE +GU9 +IDO +KO4 +NRU +4MO diff --git a/hotspots/hs_pharmacophore.py b/hotspots/hs_pharmacophore.py index a239511c..36d163f7 100644 --- a/hotspots/hs_pharmacophore.py +++ b/hotspots/hs_pharmacophore.py @@ -30,8 +30,8 @@ from __future__ import print_function import csv import json -import os import re +import os import tempfile from os.path import basename, splitext, join, dirname @@ -48,11 +48,26 @@ from hotspots.pdb_python_api import Query, PDB, PDBResult from rdkit import Chem, DataStructs -# from rdkit.Chem import MACCSkey, AllChem, Draw -from rdkit.Chem import MACCSkeys +from rdkit.Chem import MACCSkeys, AllChem from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score from tqdm import tqdm +import numba +from sklearn.manifold import TSNE +import matplotlib.pyplot as plt +import hdbscan + +@numba.njit() +def tanimoto_dist(a, b): + """ + calculate the tanimoto distance between two fingerprint arrays + :param a: + :param b: + :return: + """ + dotprod = np.dot(a, b) + tc = dotprod / (np.sum(a) + np.sum(b) - dotprod) + return 1.0 - tc class PharmacophoreModel(Helper): @@ -116,7 +131,7 @@ def _usr_moment(self): """ try: from hotspots.hs_utilities import _generate_usr_moment - type_dic = {"apolar" : [], + type_dic = {"apolar": [], "acceptor": [], "donor": [], "positive": [], @@ -140,6 +155,21 @@ def features(self): """ return self._features + def _comparision_dict(self): + """ + converts pharmacophore into comparision dictionary + + :return: dic + """ + d = {} + self.rank_features(max_features=20) + rank_dic = {"apolar": 0, "donor": 0, "acceptor": 0, "negative":0, "positive":0} + for feat in self._features: + d.update({feat.score_value: [feat.feature_type, feat.feature_coordinates, rank_dic[feat.feature_type]]}) + rank_dic[feat.feature_type] += 1 + + return d + def rank_features(self, max_features=4, feature_threshold=0, force_apolar=True): """ orders features by score @@ -264,7 +294,7 @@ def _as_grid(self, feature_type=None, tolerance=2): grd = Grid(origin=origin, far_corner=far_corner, spacing=0.5, default=0, _grid=None) for feat in filtered_features: - grd.set_sphere(point=feat.feature_coordinates,radius=self.settings.radius, value=1,scaling='None') + grd.set_sphere(point=feat.feature_coordinates, radius=self.settings.radius, value=1, scaling='None') return grd def _get_binding_site_residues(self): @@ -299,7 +329,8 @@ def _get_crossminer_pharmacophore(self): except: raise ImportError("Crossminer is only available to CSD-Discovery") - feature_definitions = {supported_features[fd.identifier]: fd for fd in Pharmacophore.feature_definitions.values() + feature_definitions = {supported_features[fd.identifier]: fd for fd in + Pharmacophore.feature_definitions.values() if fd.identifier in supported_features.keys()} model_features = [] @@ -381,41 +412,70 @@ def _get_ligands(results): for l in entry.filtered_ligands: try: l.rdmol = Chem.MolFromSmiles(l.smiles) + AllChem.Compute2DCoords(l.rdmol) l.rdmol.SetProp("_Name", str(entry.identifier + "/" + l.chemical_id)) - l.fingerprint = MACCSkeys.GenMACCSKeys(l.rdmol) + # l.fingerprint = MACCSkeys.GenMACCSKeys(l.rdmol) + l.fingerprint = AllChem.GetMorganFingerprintAsBitVect(l.rdmol, 2) if l.chemical_id not in uniques: ligs.append(l) uniques.append(l.chemical_id) - except AttributeError: + except: continue return ligs @staticmethod - def _cluster_ligands(n, ligands): + def _cluster_ligands(ligands, t): """ - !!! OUT OF DATE !!! - - Use clustering in cluster_img.py (Not public yet) :return: """ - cluster_dict = {a: [] for a in range(int(n))} - num = len(ligands) - sim = np.zeros((num, num)) - for i in range(num): - for j in range(num): - sim[i, j] = DataStructs.FingerprintSimilarity(ligands[i].fingerprint, - ligands[j].fingerprint) + def fingerprint_array(ligands): + X =[] + for l in ligands: + arr = np.zeros((0,)) + DataStructs.ConvertToNumpyArray(l.fingerprint, arr) + X.append(arr) + return X + + cluster_dic = {} + + # generate fingerprint array + X = fingerprint_array(ligands) + if len(X) < 2: + X = fingerprint_array(ligands) + if len(X) < 2: + raise ValueError("Fingerprint array must contain more than 1 entry") - kmeans_model = KMeans(n_clusters=n, random_state=1).fit(sim) - labels = kmeans_model.labels_ - s = silhouette_score(sim, labels, metric='euclidean') + # dimensionality reduction + tsne_X = TSNE(n_components=3, metric=tanimoto_dist).fit_transform(np.array(X, dtype=np.float32)) + + # clustering + cluster_tsne = hdbscan.HDBSCAN(min_cluster_size=2, gen_min_span_tree=True) + cluster_tsne.fit(tsne_X) + + for i, label in enumerate(cluster_tsne.labels_): + if label == -1: + continue + else: + if label in cluster_dic: + cluster_dic[label].append(ligands[i]) + else: + cluster_dic.update({label: [ligands[i]]}) - for i, ident in enumerate(kmeans_model.labels_): - ligands[i].cluster_id = ident - cluster_dict[int(ident)].append(ligands[i]) + x = [tsne_X.T[0][j] for j, l in enumerate(cluster_tsne.labels_) if l != -1] + y = [tsne_X.T[1][j] for j, l in enumerate(cluster_tsne.labels_) if l != -1] + hue = [l for j, l in enumerate(cluster_tsne.labels_) if l != -1] - return cluster_dict, s + plt.scatter(x, y, c=hue, cmap='RdBu') + + plt.title("{} clusters".format(t)) + plt.savefig("{}.png".format(t)) + plt.close() + if len(cluster_dic) == 0: + print("NO CLUSTERS FOUND") + cluster_dic = {i: [ligands[i]] for i in range(0, len(ligands))} + + return cluster_dic @staticmethod def _align_proteins(reference, reference_chain, targets): @@ -438,6 +498,7 @@ def _align_proteins(reference, reference_chain, targets): prot = Protein.from_file(t.fname) prot.detect_ligand_bonds() prot.add_hydrogens() + chain = None for l in prot.ligands: if str(t.clustered_ligand) == str(l.identifier.split(":")[1][0:3]): try: @@ -446,14 +507,11 @@ def _align_proteins(reference, reference_chain, targets): distance=6) chain = bs.residues[0].identifier.split(":")[0] except: - break - break + chain = None - else: - continue - if not chain: + if chain is None: print("\n {} failed! No chain detected".format(t.identifier)) - break + continue try: binding_site_superposition = Protein.ChainSuperposition() (bs_rmsd, bs_transformation) = binding_site_superposition.superpose(reference[reference_chain], @@ -463,7 +521,7 @@ def _align_proteins(reference, reference_chain, targets): if str(t.clustered_ligand) == str(lig.identifier.split(":")[1][0:3]): if chain == str(lig.identifier.split(":")[0]): aligned_ligands.append(lig) - except IndexError: + except: print("\n {} failed!".format(t.identifier)) continue @@ -498,12 +556,12 @@ def write(self, fname): for feature in self._features: line += "{0},{1},{2},{3},{4},{5}".format(self.identifier, - feature.feature_type, - feature.feature_coordinates.x, - feature.feature_coordinates.y, - feature.feature_coordinates.z, - feature.score_value - ) + feature.feature_type, + feature.feature_coordinates.x, + feature.feature_coordinates.y, + feature.feature_coordinates.z, + feature.score_value + ) if feature.projected_coordinates: line += ",{0},{1},{2}".format(feature.projected_coordinates.x, feature.projected_coordinates.y, @@ -532,7 +590,7 @@ def write(self, fname): pymol_file.write(pymol_out) label = self.get_label(self) - with io.MoleculeWriter(join(dirname(fname),lfile)) as writer: + with io.MoleculeWriter(join(dirname(fname), lfile)) as writer: writer.write(label) elif extension == ".json": @@ -582,7 +640,7 @@ def write(self, fname): pharmit_file.write(json.dumps({"points": pts})) elif extension == ".mol2": - mol = Molecule(identifier = "pharmacophore_model") + mol = Molecule(identifier="pharmacophore_model") atom_dic = {"apolar": 'C', "donor": 'N', "acceptor": 'O', @@ -595,7 +653,7 @@ def write(self, fname): label=str(feat.score_value)) for feat in self.features] - for a in pseudo_atms: + for a in pseudo_atms: mol.add_atom(a) with io.MoleculeWriter(fname) as w: @@ -664,8 +722,9 @@ def _from_file(fname, protein=None, identifier=None, settings=None): with open(fname) as f: file = f.read().split("FEATURE_LIBRARY_END")[1] - lines = [l for l in file.split("""\n\n""") if l != ""] - feature_list = [f for f in [_PharmacophoreFeature.from_crossminer(feature) for feature in lines] if f != None] + lines = [l for l in file.split("""\r\n\r\n""") if l != ""] + feature_list = [f for f in [_PharmacophoreFeature.from_crossminer(feature) for feature in lines] if + f != None] return PharmacophoreModel(settings, identifier=identifier, @@ -731,7 +790,7 @@ def from_ligands(ligands, identifier, protein=None, settings=None): all_feats = [f for l in detected for f in l] if not all_feats: - continue + continue for f in all_feats: feature_dic[cm_dic[fd.identifier]].set_sphere(f.spheres[0].centre, f.spheres[0].radius, 1) @@ -752,7 +811,8 @@ def from_ligands(ligands, identifier, protein=None, settings=None): feature_type=feat, feature_coordinates=coords, projected_coordinates=projected_coordinates, - score_value=feature_grd.value_at_coordinate(coords, position=False), + score_value=feature_grd.value_at_coordinate(coords, + position=False), vector=None, settings=settings ) @@ -763,6 +823,7 @@ def from_ligands(ligands, identifier, protein=None, settings=None): features=features, protein=protein, dic=feature_dic) + @staticmethod def _to_file(ligands, out_dir, fname="ligands.dat"): with open(os.path.join(out_dir, fname), "w") as f: @@ -770,7 +831,7 @@ def _to_file(ligands, out_dir, fname="ligands.dat"): f.write("{},{},{}\n".format(l.structure_id, l.chemical_id, l.smiles)) @staticmethod - def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="LigandBasedPharmacophore"): + def from_pdb(pdb_code, chain, representatives=None, identifier="LigandBasedPharmacophore"): """ creates a Pharmacophore Model from a PDB code. @@ -808,7 +869,7 @@ def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="Li temp = tempfile.mkdtemp() ref = PDBResult(pdb_code) ref.download(out_dir=temp, compressed=False) - + all_ligands = None if representatives: print("Reading representative PDB codes ...") reps = [] @@ -822,15 +883,10 @@ def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="Li else: accession_id = PDBResult(pdb_code).protein.sub_units[0].accession_id results = PharmacophoreModel._run_query(accession_id) - ligands = PharmacophoreModel._get_ligands(results) + # return all_ligands + all_ligands = PharmacophoreModel._get_ligands(results) - top = os.path.dirname(os.path.dirname(out_dir)) - PharmacophoreModel._to_file(ligands, top, fname="all_ligands.dat") - - k = int(round(len(ligands) / 5)) - if k < 2: - k = 2 - cluster_dict, s = PharmacophoreModel._cluster_ligands(n=k, ligands=ligands) + cluster_dict = PharmacophoreModel._cluster_ligands(ligands=all_ligands, t=identifier) reps = [l[0] for l in cluster_dict.values() if len(l) != 0] targets = [] @@ -854,21 +910,12 @@ def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="Li reference_chain=chain, targets=targets) - try: - with open(os.path.join(top, "silhouette.dat"), "w") as sfile: - sfile.write("{}".format(s)) - PharmacophoreModel._to_file(reps, top, fname="representatives.dat") - - except: - pass - - if out_dir: - with io.MoleculeWriter(os.path.join(out_dir, "aligned_mols.mol2")) as w: - for l in ligands: - w.write(l) + p = PharmacophoreModel.from_ligands(ligands=ligands, identifier=identifier) + p.all_ligands = all_ligands + p.representatives = reps + p.aligned_ligands = ligands - return PharmacophoreModel.from_ligands(ligands=ligands, - identifier=identifier) + return p class _PharmacophoreFeature(Helper): @@ -884,7 +931,9 @@ class _PharmacophoreFeature(Helper): :param settings: """ - def __init__(self, projected, feature_type, feature_coordinates, projected_coordinates, score_value, vector, settings): + + def __init__(self, projected, feature_type, feature_coordinates, projected_coordinates, score_value, vector, + settings): self._projected = projected self._feature_type = feature_type self._feature_coordinates = feature_coordinates @@ -921,7 +970,6 @@ def score_value(self): def vector(self): return self._vector - @staticmethod def from_hotspot(grid, probe, protein, settings): """ @@ -986,13 +1034,14 @@ def from_crossminer(feature_str): vector = None settings = PharmacophoreModel.Settings() - feat = re.search(r"""PHARMACOPHORE_FEATURE (.+?)\n""", feature_str) + feat = re.search(r"""PHARMACOPHORE_FEATURE (.+?)\r""", feature_str) if feat.group(1) == "excluded_volume": pass else: - feature_type = cm_feature_dict[feat.group(1)] + feature_type = cm_feature_dict[feat.group(1)] - spher = re.findall("""PHARMACOPHORE_SPHERE (.+?)\n""", feature_str) + spher = re.findall("""PHARMACOPHORE_SPHERE (.+?)\r""", feature_str) + print(spher) if len(spher) == 1: coords = spher[0].split(" ") @@ -1003,7 +1052,7 @@ def from_crossminer(feature_str): elif len(spher) == 2: coords = spher[0].split(" ") - proj = spher[1].split(" ") + proj = spher[1].split(" ") feature_coordinates = Coordinates(float(coords[0]), float(coords[1]), float(coords[2])) projected = True projected_coordinates = Coordinates(float(proj[0]), float(proj[1]), float(proj[2])) @@ -1012,7 +1061,8 @@ def from_crossminer(feature_str): else: raise IOError("feature format not recognised") - return _PharmacophoreFeature(projected, feature_type, feature_coordinates, projected_coordinates, score, vector, + return _PharmacophoreFeature(projected, feature_type, feature_coordinates, projected_coordinates, score, + vector, settings) @staticmethod @@ -1044,7 +1094,7 @@ def get_projected_coordinates(feature_type, feature_coordinates, protein, settin if dist in near_atoms.keys(): near_atoms[dist].append(atm) else: - near_atoms.update({dist:[atm]}) + near_atoms.update({dist: [atm]}) else: continue if len(near_atoms.keys()) == 0: @@ -1069,9 +1119,9 @@ def get_maxima(grid): coords = grid.indices_to_point(indices[0][0], indices[0][1], indices[0][2]) return max_value, Coordinates(coords[0], coords[1], coords[2]) else: - coords = grid.indices_to_point(round(sum(i[0] for i in indices)/len(indices)), - round(sum(j[1] for j in indices)/len(indices)), - round(sum(k[2] for k in indices)/len(indices)) + coords = grid.indices_to_point(round(sum(i[0] for i in indices) / len(indices)), + round(sum(j[1] for j in indices) / len(indices)), + round(sum(k[2] for k in indices) / len(indices)) ) return max_value, Coordinates(coords[0], coords[1], coords[2]) @@ -1099,9 +1149,9 @@ def get_centroid(grid): total_mass += grid_val coords = Coordinates(np.divide(weighted_x, total_mass), - np.divide(weighted_y, total_mass), - np.divide(weighted_z, total_mass) - ) + np.divide(weighted_y, total_mass), + np.divide(weighted_z, total_mass) + ) score = grid.value_at_point(coords) return float(score), coords diff --git a/hotspots/hs_utilities.py b/hotspots/hs_utilities.py index ea46f1a7..b9a333c0 100644 --- a/hotspots/hs_utilities.py +++ b/hotspots/hs_utilities.py @@ -15,8 +15,8 @@ from os import mkdir from os.path import exists, join, abspath -import matplotlib as mpl -mpl.use('TkAgg') +# import matplotlib as mpl +# mpl.use('TkAgg') import matplotlib.pyplot as plt import numpy as np diff --git a/materials/fragment-hotspot-maps-2019.pdf b/materials/fragment-hotspot-maps-2019.pdf new file mode 100644 index 00000000..7cc6a8c0 Binary files /dev/null and b/materials/fragment-hotspot-maps-2019.pdf differ diff --git a/materials/fragment-hotspot-maps-2019.pptx b/materials/fragment-hotspot-maps-2019.pptx new file mode 100644 index 00000000..8fbc7bd8 Binary files /dev/null and b/materials/fragment-hotspot-maps-2019.pptx differ diff --git a/materials/methodology.pptx b/materials/methodology.pptx new file mode 100644 index 00000000..db999635 Binary files /dev/null and b/materials/methodology.pptx differ