diff --git a/build/lib/hotspots/__init__.py b/build/lib/hotspots/__init__.py
index c8c5a079..3f15e846 100644
--- a/build/lib/hotspots/__init__.py
+++ b/build/lib/hotspots/__init__.py
@@ -5,7 +5,7 @@
 __copyright__ = None
 __credits__ = None
 __license__ = None
-__version__ = "1.0.0"
+__version__ = "1.0.2"
 __maintainer__ = "Peter Curran"
 __email__ = "pcurran@ccdc.cam.ac.uk"
 __status__ = "Development"
diff --git a/build/lib/hotspots/grid_extension.py b/build/lib/hotspots/grid_extension.py
index 45df5909..dd6a4063 100644
--- a/build/lib/hotspots/grid_extension.py
+++ b/build/lib/hotspots/grid_extension.py
@@ -38,6 +38,15 @@ class Grid(utilities.Grid):
     """
     A class to extend a `ccdc.utilities.Grid` this provides grid methods required in the Fragment Hotspot Maps algorithm
     """
+    def coordinates(self, threshold=1):
+        nx, ny, nz = self.nsteps
+        return [self.indices_to_point(i, j, k)
+                for i in range(nx)
+                for j in range(ny)
+                for k in range(nz)
+                if self.value(i, j, k) >= threshold]
+
+
     def grid_value_by_coordinates(self, threshold=1):
         """
         returns a dictionary of grid point values by coordinates
@@ -657,6 +666,7 @@ def value_at_coordinate(self, coordinates, tolerance=1, position=True):
         else:
             return score
 
+
 utilities.Grid = Grid
 
 
diff --git a/build/lib/hotspots/hs_docking.py b/build/lib/hotspots/hs_docking.py
index d5387e5d..a31ce948 100644
--- a/build/lib/hotspots/hs_docking.py
+++ b/build/lib/hotspots/hs_docking.py
@@ -84,7 +84,7 @@ def __init__(self, atoms, weight=5.0, min_hbond_score=0.001, _constraint=None):
             super(self.__class__, self).__init__(_constraint)
 
         @staticmethod
-        def from_file(path, protein, weight, min_hbond_score=0.2, max=2):
+        def from_file(path, protein, weight, min_hbond_score=0.001, max=2):
             """
             create a hotspot constraint from file
 
diff --git a/build/lib/hotspots/hs_pharmacophore.py b/build/lib/hotspots/hs_pharmacophore.py
index b96f0e7a..e9a02000 100644
--- a/build/lib/hotspots/hs_pharmacophore.py
+++ b/build/lib/hotspots/hs_pharmacophore.py
@@ -109,6 +109,28 @@ def __init__(self, settings, identifier=None, features=None, protein=None, dic=N
 
         self.dic = dic
 
+    def _usr_moment(self):
+        """
+        generates USR moments for shape analysis
+        :return:
+        """
+        try:
+            from hotspots.hs_utilities import _generate_usr_moment
+            type_dic = {"apolar" : [],
+                        "acceptor": [],
+                        "donor": [],
+                        "positive": [],
+                        "negative": []
+                        }
+
+            for feat in self.features:
+                type_dic[feat.feature_type].append(feat.feature_coordinates)
+
+            coords_list = [np.array(v) for v in type_dic.values() if len(v) != 0]
+            return _generate_usr_moment(fcoords_list=coords_list)
+        except ImportError:
+            print("To use this feature you must have USR installed")
+
     @property
     def features(self):
         """
@@ -372,6 +394,9 @@ def _get_ligands(results):
     def _cluster_ligands(n, ligands):
         """
         !!! OUT OF DATE !!!
+
+
+        Use clustering in cluster_img.py (Not public yet)
         :return:
         """
         cluster_dict = {a: [] for a in range(int(n))}
diff --git a/build/lib/hotspots/hs_utilities.py b/build/lib/hotspots/hs_utilities.py
index bd1c3000..4ee97997 100644
--- a/build/lib/hotspots/hs_utilities.py
+++ b/build/lib/hotspots/hs_utilities.py
@@ -25,9 +25,45 @@
 from ccdc.io import MoleculeWriter
 from ccdc.molecule import Molecule, Atom
 
+
 Coordinates = collections.namedtuple('Coordinates', ['x', 'y', 'z'])
 
 
+def _generate_usr_moment(fcoords_list):
+    """
+    PRIVATE EXPERIMENTAL FEATURE
+
+    Modification of Adrian Schreyer's code https://bitbucket.org/aschreyer/usrcat
+
+    More information on the USR algorithm:
+        - https://doi.org/10.1098/rspa.2007.1823
+
+    :param grds:
+
+    :return:
+    """
+    from usrcat.geometry import usr_moments, usr_moments_with_existing
+
+    all_coords = np.concatenate([c for c in fcoords_list if len(c) != 0])
+    #np.vstack(fcoords_list)
+    (ctd, cst, fct, ftf), om = usr_moments(all_coords)
+
+    for fcoords in fcoords_list:
+
+        # initial zeroed out USRCAT feature moments
+        fm = np.zeros(12)
+
+        # only attempt to generate moments if there are enough atoms available!
+        if len(fcoords):
+            fm = usr_moments_with_existing(fcoords, ctd, cst, fct, ftf)
+
+        # append feature moments to the existing ones
+        om = np.append(om, fm)
+
+    return np.array([om])
+
+
+
 class Helper(object):
     """
     A class to handle miscellaneous functionality
diff --git a/build/lib/hotspots/result.py b/build/lib/hotspots/result.py
index 9ce7cddf..d696787d 100644
--- a/build/lib/hotspots/result.py
+++ b/build/lib/hotspots/result.py
@@ -47,9 +47,9 @@
 from hotspots.grid_extension import Grid, _GridEnsemble
 from hotspots.hs_utilities import Figures
 from hotspots.template_strings import pymol_imports
-from hs_pharmacophore import PharmacophoreModel
-from hs_utilities import Helper
-from atomic_hotspot_calculation import AtomicHotspot, _AtomicHotspotResult
+from hotspots.hs_utilities import Helper
+from hotspots.hs_pharmacophore import PharmacophoreModel
+from hotspots.atomic_hotspot_calculation import AtomicHotspot, _AtomicHotspotResult
 
 
 class _Scorer(Helper):
@@ -60,6 +60,7 @@ class _Scorer(Helper):
     :param obj: either `ccdc.molecule.Molecule` or `ccdc.protein.Protein`
     :param int tolerance: search distance
     """
+
     def __init__(self, hotspot_result, obj, tolerance):
         self.hotspot_result = hotspot_result
         self.object = obj
@@ -151,6 +152,7 @@ def _score_protein_backup(self, prot):
 
         :return:
         """
+
         def fetch_scores(atom, grid, tolerance=4):
             try:
                 return max(self.hotspot_result.super_grids[grid].get_near_scores(coordinate=atom.coordinates,
@@ -222,11 +224,9 @@ def score_molecule(self):
 
     def _score_feature(self, f):
 
-        ideal_coord = (f.coordinates[n] + 1.8*(f.protein_vector[n]) for n in xrange(0,2))
+        ideal_coord = (f.coordinates[n] + 1.8 * (f.protein_vector[n]) for n in xrange(0, 2))
         print(ideal_coord)
 
-
-
     def score_cavity(self):
         # TODO: return scored cavity _features, the score protein function should be enough tbh
         cav = copy.copy(self.scored_object)
@@ -234,7 +234,6 @@ def score_cavity(self):
         for f in cav.features:
             self._score_feature(f)
 
-
     def score_hotspot(self, threshold=5, percentile=50):
         """
         Hotspot scored on the median value of all points included in the hotspot.
@@ -343,14 +342,26 @@ def __init__(self, super_grids, protein, buriedness=None, pharmacophore=None):
         if pharmacophore:
             self.pharmacophore = self.get_pharmacophore_model()
 
-
     class ConstraintData(object):
         """
         standardise constrain read and write (required for the GOLD optimisation)
 
         """
-        def __init__(self, score_by_index):
+
+        def __init__(self, score_by_index, prot=None):
             self.score_by_index = OrderedDict(score_by_index)
+            self.protein = prot
+
+        def to_molecule(self, protein=None):
+            if self.protein is None:
+                if protein is None:
+                    raise AttributeError("Give me a protein")
+            mol = Molecule(identifier="constraints")
+            for score, index in self.score_by_index.items():
+                atm = self.protein.atoms[index]
+                atm.label = str(score)
+                mol.add_atom(atm)
+            return mol
 
         def write(self, path):
             f = open(path, "wb")
@@ -512,7 +523,7 @@ def filter_point(x, y, z):
                 [g[x + i][y + j][z + k] for i in range(-tol, tol + 1) for j in range(-tol, tol + 1) for k in
                  range(-tol, tol + 1)])
             if loc_arr[loc_arr > 0].size != 0:
-                #print(np.mean(loc_arr[loc_arr > 0]))
+                # print(np.mean(loc_arr[loc_arr > 0]))
                 new_grid[x][y][z] = np.mean(loc_arr[loc_arr > 0])
 
         vfilter_point = np.vectorize(filter_point)
@@ -666,34 +677,117 @@ def docking_fitting_pts(self, _best_island=None, threshold=17):
                 mol.add_atom(atom=atm)
         return mol
 
-    def docking_constraint_atoms(self, max_constraints=5):
+    def _output_feature_centroid(self):
+        dic = {"apolar": "C",
+               "acceptor": "N",
+               "donor": "O"}
+        mol = Molecule(identifier="centroids")
+        for i, feat in enumerate(self.features):
+            coords = feat.grid.centroid()
+            mol.add_atom(Atom(atomic_symbol=dic[feat.feature_type],
+                              atomic_number=14,
+                              coordinates=coords,
+                              label=str(i)))
+        from ccdc import io
+        with io.MoleculeWriter("cenroid.mol2") as w:
+            w.write(mol)
+
+    def docking_constraint_atoms(self, max_constraints=10, accessible_cutoff=0.001, max_distance=4):
         """
         creates a dictionary of constraints
 
         :param int max_constraints: max number of constraints
         :return dic: score by atom
         """
-        for atm in self.protein.atoms:
-            atm.partial_charge = int(0)
-        prot = self.score(self.protein)
 
-        coords = np.array([a.coordinates for a in prot.atoms])
-        atm_dic = {atm.partial_charge: atm.index for atm in prot.atoms
-                   if type(atm.partial_charge) is float
-                   and ((atm.atomic_number == 1 and atm.neighbours[0].is_donor) or atm.is_acceptor)
-                   and self._is_solvent_accessible(coords, atm, min_distance=2)
-                   }
+        from hotspots.hs_utilities import Helper
+        from scipy.spatial import distance
+
+        point = self.super_grids['apolar'].centroid()
+        bs = self.protein.BindingSiteFromPoint(protein=self.protein, origin=point, distance=12.)
+        p = self.protein.copy()
+        for a in p.atoms:
+            a.label = str(a.index)
+
+        remove = set(p.residues) - set(bs.residues)
+
+        for r in remove:
+            p.remove_residue(r.identifier)
 
-        if len(atm_dic) > max_constraints:
-            scores = sorted([f[0] for f in atm_dic.items()], reverse=True)[:max_constraints]
+        donors = {a.coordinates: a.label for a in p.atoms
+                  if (a.atomic_number == 1 and a.neighbours[0].is_donor and a.solvent_accessible_surface() > accessible_cutoff)
+                  }
+
+        acceptors = {a.coordinates: a.label for a in p.atoms
+                     if a.is_acceptor and a.solvent_accessible_surface() > accessible_cutoff
+                     }
+        pairs = {"acceptor": donors,
+                 "donor": acceptors}
+
+        constraint_dic = {}
+
+        for feature in self.features:
+            centoid = [feature.grid.centroid()]
+            coords = pairs[feature.feature_type].keys()
+            all_distances = distance.cdist(coords, centoid, 'euclidean')
+            ind = int(np.argmin(all_distances))
+            min_coord = coords[ind]
+            atm_index = int(pairs[feature.feature_type][min_coord])
+
+            if all_distances[ind] < max_distance:
+                constraint_dic.update({feature.score_value: atm_index})
+
+        if len(constraint_dic) > max_constraints:
+            scores = sorted([f[0] for f in constraint_dic.items()], reverse=True)[:max_constraints]
         else:
-            scores = sorted([f[0] for f in atm_dic.items()], reverse=True)
+            scores = sorted([f[0] for f in constraint_dic.items()], reverse=True)
 
-        bin_keys = set(atm_dic.keys()) - set(scores)
+        bin_keys = set(constraint_dic.keys()) - set(scores)
         for b in bin_keys:
-            del atm_dic[b]
+            del constraint_dic[b]
+
+        return self.ConstraintData(constraint_dic, self.protein)
+
+            # for atm in self.protein.atoms:
+            #     atm.partial_charge = int(0)
+            # prot = self.score(self.protein)
+            #
+            # coords = np.array([a.coordinates for a in prot.atoms])
+            # atm_dic = {atm.partial_charge: atm.index for atm in prot.atoms
+            #            if type(atm.partial_charge) is float
+            #            and ((atm.atomic_number == 1 and atm.neighbours[0].is_donor) or atm.is_acceptor)
+            #            and self._is_solvent_accessible(coords, atm, min_distance=2)
+            #            }
+            #
+            # if len(atm_dic) > max_constraints:
+            #     scores = sorted([f[0] for f in atm_dic.items()], reverse=True)[:max_constraints]
+            # else:
+            #     scores = sorted([f[0] for f in atm_dic.items()], reverse=True)
+            #
+            # bin_keys = set(atm_dic.keys()) - set(scores)
+            # for b in bin_keys:
+            #     del atm_dic[b]
+            #
+            # return self.ConstraintData(atm_dic, self.protein)
+
+    def _usr_moment(self, threshold=14):
+        """
+        PRIVATE
+        This is some experimental code and requires seperate installation of USR
+        generates USR moments for shape analysis
+        :return:
+        """
+        try:
+            from hs_utilities import _generate_usr_moment
+
+            coords_list = [np.array(g.coordinates(threshold=threshold))
+                           for p, g in self.super_grids.items()
+                           if p != "negative" and p != "positive"]
 
-        return self.ConstraintData(atm_dic)
+            return _generate_usr_moment(fcoords_list=coords_list)
+
+        except ImportError:
+            print("To use this feature you must have USR installed")
 
     def map_values(self):
         """
@@ -797,8 +891,8 @@ def _get_superstar_profile(self, feature_radius=1.5, nthreads=6, features=None,
                     g = ss_dict[shortest]
 
                 feat.superstar_results.append(_AtomicHotspotResult(identifier=r.identifier,
-                                                                  grid=g,
-                                                                  buriedness=None)
+                                                                   grid=g,
+                                                                   buriedness=None)
                                               )
 
                 score.append(g.grid_score(threshold=1, percentile=50))
@@ -1229,8 +1323,8 @@ def _get_interaction_type(self, mask_dic, best_island, threshold, settings):
         features_in_vol = {p: g * (g & common_best_island) for p, g in mask_dic.items()}
         location = features_in_vol["apolar"]
         features = self.hotspot_result._get_features(interaction_dict=features_in_vol,
-                                         threshold=threshold,
-                                         min_feature_gp=settings.min_feature_gp)
+                                                     threshold=threshold,
+                                                     min_feature_gp=settings.min_feature_gp)
         return location, features
 
     @staticmethod
@@ -1311,7 +1405,6 @@ def _get_grid_dict(location, features, settings):
     #
     #         feat.superstar_profile = super_profile
 
-
     @property
     def single_grid(self):
         return self._single_grid
@@ -1451,7 +1544,7 @@ def _rank_extracted_hotspots(self):
         self.extracted_hotspots = [value for (key, value) in sorted(extracted_hotspots_by_rank.items())]
 
         for i, hs in enumerate(self.extracted_hotspots):
-            hs.identifier = hs.score_value #"rank_{}".format(hs.rank)
+            hs.identifier = hs.score_value  # "rank_{}".format(hs.rank)
             print("rank", hs.rank, "score", hs.score_value)
 
     def _select_cavity_grids(self, cavs):
diff --git a/examples/1_tractibility/tractability.png b/examples/1_tractibility/tractability.png
index 724ef751..20fa89dc 100644
Binary files a/examples/1_tractibility/tractability.png and b/examples/1_tractibility/tractability.png differ
diff --git a/examples/1_tractibility/visualise.py b/examples/1_tractibility/visualise.py
index cdd1dd85..46447d5c 100644
--- a/examples/1_tractibility/visualise.py
+++ b/examples/1_tractibility/visualise.py
@@ -9,8 +9,11 @@
 sns.set(style="white",
         rc={"axes.facecolor": (0, 0, 0, 0)},
         font_scale=6)
-pal = [(117/255, 216/255, 213/255),
-       (211/255, 60/255, 96/255)]
+# pal = [(117/255, 216/255, 213/255),
+#        (211/255, 60/255, 96/255)]
+
+pal = [(202/255, 41/255, 54/255),
+       (140/255, 157/255, 196/255)]
 
 # Get Data
 fname = "scores.csv"
diff --git a/hotspots/excluded_het_id.txt b/hotspots/excluded_het_id.txt
new file mode 100644
index 00000000..e925af38
--- /dev/null
+++ b/hotspots/excluded_het_id.txt
@@ -0,0 +1,298 @@
+3CO
+BUQ
+NAG
+NAD
+CR
+SF4
+EOH
+ZNO
+NAO
+EOM
+EHN
+NAP
+CCN
+NAW
+BR
+EGL
+NI2
+GSH
+NI1
+O2
+BA
+RU
+SAH
+GU7
+SAM
+TAS
+DCE
+2BM
+TP7
+OF3
+OF1
+RB
+IOH
+MW1
+IOD
+C2O
+BNZ
+TCN
+ARS
+NH4
+GD
+PER
+GA
+TPP
+CHX
+CME
+THB
+IPA
+CD1
+OH
+SO4
+DTT
+PQN
+CYN
+PQQ
+PYJ
+PEO
+NA6
+MBR
+NA5
+OS
+MAN
+CMO
+OCL
+DMF
+OCN
+MO3
+NGN
+ACT
+U1
+HDZ
+MO5
+MO4
+VO4
+DMS
+FUC
+PCL
+CB5
+EEE
+HG
+NO2
+CMP
+PR
+BMA
+IUM
+PT
+ZN2
+TTP
+NO3
+YT3
+TYS
+PB
+M2M
+ZO3
+PD
+AMP
+PI
+MH3
+AF3
+ZN
+MN3
+OXY
+NI
+CSD
+OX
+PS5
+MN5
+MN6
+S
+HOH
+W
+SB
+FOL
+OXE
+PT4
+PBM
+O
+MW2
+MG
+543
+MSM
+C5P
+ANL
+MTO
+NO
+TBU
+OPY
+PC4
+GU3
+GU2
+GU1
+MOH
+ANP
+GU6
+GU5
+GU4
+AU
+OC3
+BTN
+I42
+OC4
+OC7
+OC6
+TMP
+RE
+GD3
+CTP
+ACE
+3OF
+ETZ
+MM4
+IN
+ACN
+DOD
+AST
+COA
+EU
+DOX
+COB
+B12
+REO
+ATP
+CD3
+U10
+ACY
+PEG
+YB
+NDP
+NBZ
+ETI
+SER
+C2C
+NA
+FMT
+ASC
+AU3
+FE2
+LNK
+SEK
+MO1
+EU3
+1BO
+AUC
+CLO
+FE
+DUM
+ADP
+OF2
+BEF
+FEL
+BF4
+HEX
+CUZ
+NDG
+XE
+FMN
+YAN
+CUA
+V
+CUO
+HEM
+GMP
+CU
+MGF
+GDP
+CFT
+SBT
+PLP
+SR
+FU1
+EDN
+EDO
+H2S
+ND4
+BRO
+KR
+CS
+NME
+CDP
+HGI
+SM
+ALY
+NMO
+TDP
+SE
+HO
+3CN
+AZI
+F42
+FLO
+6MO
+EMC
+Y1
+MO7
+SE4
+BF2
+CO
+NGD
+2MO
+202
+DIS
+MBN
+LA
+PGO
+CL
+HP6
+SO2
+LI
+PPS
+TPO
+POL
+GU0
+SGM
+DTU
+MOO
+TE
+TB
+CA
+FAD
+CNV
+GOL
+SCN
+AG
+PO4
+IR
+DIO
+NH2
+8CL
+3NI
+IRI
+UTP
+AR
+N4M
+CE
+NH3
+MN
+CNN
+HGC
+GU8
+GTP
+UDP
+OC2
+ART
+TFH
+MCH
+2NO
+6WO
+CD5
+KCX
+E1H
+ARF
+TL
+DXE
+GU9
+IDO
+KO4
+NRU
+4MO
diff --git a/hotspots/hs_pharmacophore.py b/hotspots/hs_pharmacophore.py
index a239511c..36d163f7 100644
--- a/hotspots/hs_pharmacophore.py
+++ b/hotspots/hs_pharmacophore.py
@@ -30,8 +30,8 @@
 from __future__ import print_function
 import csv
 import json
-import os
 import re
+import os
 import tempfile
 from os.path import basename, splitext, join, dirname
 
@@ -48,11 +48,26 @@
 from hotspots.pdb_python_api import Query, PDB, PDBResult
 
 from rdkit import Chem, DataStructs
-# from rdkit.Chem import MACCSkey, AllChem, Draw
-from rdkit.Chem import MACCSkeys
+from rdkit.Chem import MACCSkeys, AllChem
 from sklearn.cluster import KMeans
 from sklearn.metrics import silhouette_score
 from tqdm import tqdm
+import numba
+from sklearn.manifold import TSNE
+import matplotlib.pyplot as plt
+import hdbscan
+
+@numba.njit()
+def tanimoto_dist(a, b):
+    """
+    calculate the tanimoto distance between two fingerprint arrays
+    :param a:
+    :param b:
+    :return:
+    """
+    dotprod = np.dot(a, b)
+    tc = dotprod / (np.sum(a) + np.sum(b) - dotprod)
+    return 1.0 - tc
 
 
 class PharmacophoreModel(Helper):
@@ -116,7 +131,7 @@ def _usr_moment(self):
         """
         try:
             from hotspots.hs_utilities import _generate_usr_moment
-            type_dic = {"apolar" : [],
+            type_dic = {"apolar": [],
                         "acceptor": [],
                         "donor": [],
                         "positive": [],
@@ -140,6 +155,21 @@ def features(self):
         """
         return self._features
 
+    def _comparision_dict(self):
+        """
+        converts pharmacophore into comparision dictionary
+
+        :return: dic
+        """
+        d = {}
+        self.rank_features(max_features=20)
+        rank_dic = {"apolar": 0, "donor": 0, "acceptor": 0, "negative":0, "positive":0}
+        for feat in self._features:
+            d.update({feat.score_value: [feat.feature_type, feat.feature_coordinates, rank_dic[feat.feature_type]]})
+            rank_dic[feat.feature_type] += 1
+
+        return d
+
     def rank_features(self, max_features=4, feature_threshold=0, force_apolar=True):
         """
         orders features by score
@@ -264,7 +294,7 @@ def _as_grid(self, feature_type=None, tolerance=2):
         grd = Grid(origin=origin, far_corner=far_corner, spacing=0.5, default=0, _grid=None)
 
         for feat in filtered_features:
-            grd.set_sphere(point=feat.feature_coordinates,radius=self.settings.radius, value=1,scaling='None')
+            grd.set_sphere(point=feat.feature_coordinates, radius=self.settings.radius, value=1, scaling='None')
         return grd
 
     def _get_binding_site_residues(self):
@@ -299,7 +329,8 @@ def _get_crossminer_pharmacophore(self):
         except:
             raise ImportError("Crossminer is only available to CSD-Discovery")
 
-        feature_definitions = {supported_features[fd.identifier]: fd for fd in Pharmacophore.feature_definitions.values()
+        feature_definitions = {supported_features[fd.identifier]: fd for fd in
+                               Pharmacophore.feature_definitions.values()
                                if fd.identifier in supported_features.keys()}
 
         model_features = []
@@ -381,41 +412,70 @@ def _get_ligands(results):
             for l in entry.filtered_ligands:
                 try:
                     l.rdmol = Chem.MolFromSmiles(l.smiles)
+                    AllChem.Compute2DCoords(l.rdmol)
                     l.rdmol.SetProp("_Name", str(entry.identifier + "/" + l.chemical_id))
-                    l.fingerprint = MACCSkeys.GenMACCSKeys(l.rdmol)
+                    # l.fingerprint = MACCSkeys.GenMACCSKeys(l.rdmol)
+                    l.fingerprint = AllChem.GetMorganFingerprintAsBitVect(l.rdmol, 2)
                     if l.chemical_id not in uniques:
                         ligs.append(l)
                         uniques.append(l.chemical_id)
-                except AttributeError:
+                except:
                     continue
         return ligs
 
     @staticmethod
-    def _cluster_ligands(n, ligands):
+    def _cluster_ligands(ligands, t):
         """
-        !!! OUT OF DATE !!!
-
 
-        Use clustering in cluster_img.py (Not public yet)
         :return:
         """
-        cluster_dict = {a: [] for a in range(int(n))}
-        num = len(ligands)
-        sim = np.zeros((num, num))
-        for i in range(num):
-            for j in range(num):
-                sim[i, j] = DataStructs.FingerprintSimilarity(ligands[i].fingerprint,
-                                                              ligands[j].fingerprint)
+        def fingerprint_array(ligands):
+            X =[]
+            for l in ligands:
+                arr = np.zeros((0,))
+                DataStructs.ConvertToNumpyArray(l.fingerprint, arr)
+                X.append(arr)
+            return X
+
+        cluster_dic = {}
+
+        # generate fingerprint array
+        X = fingerprint_array(ligands)
+        if len(X) < 2:
+            X = fingerprint_array(ligands)
+            if len(X) < 2:
+                raise ValueError("Fingerprint array must contain more than 1 entry")
 
-        kmeans_model = KMeans(n_clusters=n, random_state=1).fit(sim)
-        labels = kmeans_model.labels_
-        s = silhouette_score(sim, labels, metric='euclidean')
+        # dimensionality reduction
+        tsne_X = TSNE(n_components=3, metric=tanimoto_dist).fit_transform(np.array(X, dtype=np.float32))
+
+        # clustering
+        cluster_tsne = hdbscan.HDBSCAN(min_cluster_size=2, gen_min_span_tree=True)
+        cluster_tsne.fit(tsne_X)
+
+        for i, label in enumerate(cluster_tsne.labels_):
+            if label == -1:
+                continue
+            else:
+                if label in cluster_dic:
+                    cluster_dic[label].append(ligands[i])
+                else:
+                    cluster_dic.update({label: [ligands[i]]})
 
-        for i, ident in enumerate(kmeans_model.labels_):
-            ligands[i].cluster_id = ident
-            cluster_dict[int(ident)].append(ligands[i])
+        x = [tsne_X.T[0][j] for j, l in enumerate(cluster_tsne.labels_) if l != -1]
+        y = [tsne_X.T[1][j] for j, l in enumerate(cluster_tsne.labels_) if l != -1]
+        hue = [l for j, l in enumerate(cluster_tsne.labels_) if l != -1]
 
-        return cluster_dict, s
+        plt.scatter(x, y, c=hue, cmap='RdBu')
+
+        plt.title("{} clusters".format(t))
+        plt.savefig("{}.png".format(t))
+        plt.close()
+        if len(cluster_dic) == 0:
+            print("NO CLUSTERS FOUND")
+            cluster_dic = {i: [ligands[i]] for i in range(0, len(ligands))}
+
+        return cluster_dic
 
     @staticmethod
     def _align_proteins(reference, reference_chain, targets):
@@ -438,6 +498,7 @@ def _align_proteins(reference, reference_chain, targets):
             prot = Protein.from_file(t.fname)
             prot.detect_ligand_bonds()
             prot.add_hydrogens()
+            chain = None
             for l in prot.ligands:
                 if str(t.clustered_ligand) == str(l.identifier.split(":")[1][0:3]):
                     try:
@@ -446,14 +507,11 @@ def _align_proteins(reference, reference_chain, targets):
                                                              distance=6)
                         chain = bs.residues[0].identifier.split(":")[0]
                     except:
-                        break
-                    break
+                        chain = None
 
-                else:
-                    continue
-            if not chain:
+            if chain is None:
                 print("\n        {} failed! No chain detected".format(t.identifier))
-                break
+                continue
             try:
                 binding_site_superposition = Protein.ChainSuperposition()
                 (bs_rmsd, bs_transformation) = binding_site_superposition.superpose(reference[reference_chain],
@@ -463,7 +521,7 @@ def _align_proteins(reference, reference_chain, targets):
                     if str(t.clustered_ligand) == str(lig.identifier.split(":")[1][0:3]):
                         if chain == str(lig.identifier.split(":")[0]):
                             aligned_ligands.append(lig)
-            except IndexError:
+            except:
                 print("\n        {} failed!".format(t.identifier))
                 continue
 
@@ -498,12 +556,12 @@ def write(self, fname):
 
                 for feature in self._features:
                     line += "{0},{1},{2},{3},{4},{5}".format(self.identifier,
-                                                            feature.feature_type,
-                                                            feature.feature_coordinates.x,
-                                                            feature.feature_coordinates.y,
-                                                            feature.feature_coordinates.z,
-                                                            feature.score_value
-                                                            )
+                                                             feature.feature_type,
+                                                             feature.feature_coordinates.x,
+                                                             feature.feature_coordinates.y,
+                                                             feature.feature_coordinates.z,
+                                                             feature.score_value
+                                                             )
                     if feature.projected_coordinates:
                         line += ",{0},{1},{2}".format(feature.projected_coordinates.x,
                                                       feature.projected_coordinates.y,
@@ -532,7 +590,7 @@ def write(self, fname):
                 pymol_file.write(pymol_out)
 
             label = self.get_label(self)
-            with io.MoleculeWriter(join(dirname(fname),lfile)) as writer:
+            with io.MoleculeWriter(join(dirname(fname), lfile)) as writer:
                 writer.write(label)
 
         elif extension == ".json":
@@ -582,7 +640,7 @@ def write(self, fname):
                 pharmit_file.write(json.dumps({"points": pts}))
 
         elif extension == ".mol2":
-            mol = Molecule(identifier = "pharmacophore_model")
+            mol = Molecule(identifier="pharmacophore_model")
             atom_dic = {"apolar": 'C',
                         "donor": 'N',
                         "acceptor": 'O',
@@ -595,7 +653,7 @@ def write(self, fname):
                                 label=str(feat.score_value))
                            for feat in self.features]
 
-            for a in  pseudo_atms:
+            for a in pseudo_atms:
                 mol.add_atom(a)
 
             with io.MoleculeWriter(fname) as w:
@@ -664,8 +722,9 @@ def _from_file(fname, protein=None, identifier=None, settings=None):
 
         with open(fname) as f:
             file = f.read().split("FEATURE_LIBRARY_END")[1]
-            lines = [l for l in file.split("""\n\n""") if l != ""]
-            feature_list = [f for f in [_PharmacophoreFeature.from_crossminer(feature) for feature in lines] if f != None]
+            lines = [l for l in file.split("""\r\n\r\n""") if l != ""]
+            feature_list = [f for f in [_PharmacophoreFeature.from_crossminer(feature) for feature in lines] if
+                            f != None]
 
         return PharmacophoreModel(settings,
                                   identifier=identifier,
@@ -731,7 +790,7 @@ def from_ligands(ligands, identifier, protein=None, settings=None):
             all_feats = [f for l in detected for f in l]
 
             if not all_feats:
-                    continue
+                continue
 
             for f in all_feats:
                 feature_dic[cm_dic[fd.identifier]].set_sphere(f.spheres[0].centre, f.spheres[0].radius, 1)
@@ -752,7 +811,8 @@ def from_ligands(ligands, identifier, protein=None, settings=None):
                                                       feature_type=feat,
                                                       feature_coordinates=coords,
                                                       projected_coordinates=projected_coordinates,
-                                                      score_value=feature_grd.value_at_coordinate(coords, position=False),
+                                                      score_value=feature_grd.value_at_coordinate(coords,
+                                                                                                  position=False),
                                                       vector=None,
                                                       settings=settings
                                                       )
@@ -763,6 +823,7 @@ def from_ligands(ligands, identifier, protein=None, settings=None):
                                   features=features,
                                   protein=protein,
                                   dic=feature_dic)
+
     @staticmethod
     def _to_file(ligands, out_dir, fname="ligands.dat"):
         with open(os.path.join(out_dir, fname), "w") as f:
@@ -770,7 +831,7 @@ def _to_file(ligands, out_dir, fname="ligands.dat"):
                 f.write("{},{},{}\n".format(l.structure_id, l.chemical_id, l.smiles))
 
     @staticmethod
-    def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="LigandBasedPharmacophore"):
+    def from_pdb(pdb_code, chain, representatives=None, identifier="LigandBasedPharmacophore"):
         """
         creates a Pharmacophore Model from a PDB code.
 
@@ -808,7 +869,7 @@ def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="Li
         temp = tempfile.mkdtemp()
         ref = PDBResult(pdb_code)
         ref.download(out_dir=temp, compressed=False)
-
+        all_ligands = None
         if representatives:
             print("Reading representative PDB codes ...")
             reps = []
@@ -822,15 +883,10 @@ def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="Li
         else:
             accession_id = PDBResult(pdb_code).protein.sub_units[0].accession_id
             results = PharmacophoreModel._run_query(accession_id)
-            ligands = PharmacophoreModel._get_ligands(results)
+            # return all_ligands
+            all_ligands = PharmacophoreModel._get_ligands(results)
 
-            top = os.path.dirname(os.path.dirname(out_dir))
-            PharmacophoreModel._to_file(ligands, top, fname="all_ligands.dat")
-
-            k = int(round(len(ligands) / 5))
-            if k < 2:
-                k = 2
-            cluster_dict, s = PharmacophoreModel._cluster_ligands(n=k, ligands=ligands)
+            cluster_dict = PharmacophoreModel._cluster_ligands(ligands=all_ligands, t=identifier)
             reps = [l[0] for l in cluster_dict.values() if len(l) != 0]
 
         targets = []
@@ -854,21 +910,12 @@ def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="Li
                                                             reference_chain=chain,
                                                             targets=targets)
 
-        try:
-            with open(os.path.join(top, "silhouette.dat"), "w") as sfile:
-                sfile.write("{}".format(s))
-            PharmacophoreModel._to_file(reps, top, fname="representatives.dat")
-
-        except:
-            pass
-
-        if out_dir:
-            with io.MoleculeWriter(os.path.join(out_dir, "aligned_mols.mol2")) as w:
-                for l in ligands:
-                    w.write(l)
+        p = PharmacophoreModel.from_ligands(ligands=ligands, identifier=identifier)
+        p.all_ligands = all_ligands
+        p.representatives = reps
+        p.aligned_ligands = ligands
 
-        return PharmacophoreModel.from_ligands(ligands=ligands,
-                                               identifier=identifier)
+        return p
 
 
 class _PharmacophoreFeature(Helper):
@@ -884,7 +931,9 @@ class _PharmacophoreFeature(Helper):
     :param settings:
 
     """
-    def __init__(self, projected, feature_type, feature_coordinates, projected_coordinates, score_value, vector, settings):
+
+    def __init__(self, projected, feature_type, feature_coordinates, projected_coordinates, score_value, vector,
+                 settings):
         self._projected = projected
         self._feature_type = feature_type
         self._feature_coordinates = feature_coordinates
@@ -921,7 +970,6 @@ def score_value(self):
     def vector(self):
         return self._vector
 
-
     @staticmethod
     def from_hotspot(grid, probe, protein, settings):
         """
@@ -986,13 +1034,14 @@ def from_crossminer(feature_str):
         vector = None
         settings = PharmacophoreModel.Settings()
 
-        feat = re.search(r"""PHARMACOPHORE_FEATURE (.+?)\n""", feature_str)
+        feat = re.search(r"""PHARMACOPHORE_FEATURE (.+?)\r""", feature_str)
         if feat.group(1) == "excluded_volume":
             pass
         else:
-            feature_type =  cm_feature_dict[feat.group(1)]
+            feature_type = cm_feature_dict[feat.group(1)]
 
-            spher = re.findall("""PHARMACOPHORE_SPHERE (.+?)\n""", feature_str)
+            spher = re.findall("""PHARMACOPHORE_SPHERE (.+?)\r""", feature_str)
+            print(spher)
 
             if len(spher) == 1:
                 coords = spher[0].split(" ")
@@ -1003,7 +1052,7 @@ def from_crossminer(feature_str):
 
             elif len(spher) == 2:
                 coords = spher[0].split(" ")
-                proj =  spher[1].split(" ")
+                proj = spher[1].split(" ")
                 feature_coordinates = Coordinates(float(coords[0]), float(coords[1]), float(coords[2]))
                 projected = True
                 projected_coordinates = Coordinates(float(proj[0]), float(proj[1]), float(proj[2]))
@@ -1012,7 +1061,8 @@ def from_crossminer(feature_str):
             else:
                 raise IOError("feature format not recognised")
 
-            return _PharmacophoreFeature(projected, feature_type, feature_coordinates, projected_coordinates, score, vector,
+            return _PharmacophoreFeature(projected, feature_type, feature_coordinates, projected_coordinates, score,
+                                         vector,
                                          settings)
 
     @staticmethod
@@ -1044,7 +1094,7 @@ def get_projected_coordinates(feature_type, feature_coordinates, protein, settin
                 if dist in near_atoms.keys():
                     near_atoms[dist].append(atm)
                 else:
-                    near_atoms.update({dist:[atm]})
+                    near_atoms.update({dist: [atm]})
             else:
                 continue
         if len(near_atoms.keys()) == 0:
@@ -1069,9 +1119,9 @@ def get_maxima(grid):
             coords = grid.indices_to_point(indices[0][0], indices[0][1], indices[0][2])
             return max_value, Coordinates(coords[0], coords[1], coords[2])
         else:
-            coords = grid.indices_to_point(round(sum(i[0] for i in indices)/len(indices)),
-                                           round(sum(j[1] for j in indices)/len(indices)),
-                                           round(sum(k[2] for k in indices)/len(indices))
+            coords = grid.indices_to_point(round(sum(i[0] for i in indices) / len(indices)),
+                                           round(sum(j[1] for j in indices) / len(indices)),
+                                           round(sum(k[2] for k in indices) / len(indices))
                                            )
             return max_value, Coordinates(coords[0], coords[1], coords[2])
 
@@ -1099,9 +1149,9 @@ def get_centroid(grid):
                     total_mass += grid_val
 
         coords = Coordinates(np.divide(weighted_x, total_mass),
-                                      np.divide(weighted_y, total_mass),
-                                      np.divide(weighted_z, total_mass)
-                                      )
+                             np.divide(weighted_y, total_mass),
+                             np.divide(weighted_z, total_mass)
+                             )
         score = grid.value_at_point(coords)
 
         return float(score), coords
diff --git a/hotspots/hs_utilities.py b/hotspots/hs_utilities.py
index ea46f1a7..b9a333c0 100644
--- a/hotspots/hs_utilities.py
+++ b/hotspots/hs_utilities.py
@@ -15,8 +15,8 @@
 from os import mkdir
 from os.path import exists, join, abspath
 
-import matplotlib as mpl
-mpl.use('TkAgg')
+# import matplotlib as mpl
+# mpl.use('TkAgg')
 import matplotlib.pyplot as plt
 
 import numpy as np
diff --git a/materials/fragment-hotspot-maps-2019.pdf b/materials/fragment-hotspot-maps-2019.pdf
new file mode 100644
index 00000000..7cc6a8c0
Binary files /dev/null and b/materials/fragment-hotspot-maps-2019.pdf differ
diff --git a/materials/fragment-hotspot-maps-2019.pptx b/materials/fragment-hotspot-maps-2019.pptx
new file mode 100644
index 00000000..8fbc7bd8
Binary files /dev/null and b/materials/fragment-hotspot-maps-2019.pptx differ
diff --git a/materials/methodology.pptx b/materials/methodology.pptx
new file mode 100644
index 00000000..db999635
Binary files /dev/null and b/materials/methodology.pptx differ