MICTI

insilicolife · Jun 20, 2019 · 248db3f · 248db3f
1 parent ba0e27f
commit 248db3f
Show file tree

Hide file tree

Showing 16 changed files with 675 additions and 106 deletions.
diff --git a/MICTI.egg-info/PKG-INFO b/MICTI.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: MICTI
-Version: 0.1.2
+Version: 0.1.3
 Summary: Feature extraction approach in single-cell gene expression profiling for cell-type marker identification.
 Home-page: https://github.com/insilicolife/micti
 Author: Nigatu Ayele
@@ -35,34 +35,27 @@ Description: MICTI- Marker gene Identification for Cell Type Identity
 
         	mictiObject=MARKER.MICTI(datamatrix, geneName, cellName, cluster_assignment=cell_type, k=None, th=0, ensembel=False, organisum="hsapiens")
 
-        2D visualisation with T-SNE:
+        2D visualisation with tSNE:
 
         	mictiObject.get_Visualization(dim=2, method="tsne")
 
         Get MICTI marker genes:
 
-        	cluster_1_markers=mictiObject.get_markers_by_Pvalues_and_Zscore(1, threshold_pvalue=.01,threshold_z_score=0)
-
-        Gene Ontology enrichment analysis for cell-type marker genes in each of cell-type clusters
-
-        	enrechment_table=mictiObject.get_gene_list_over_representation_analysis(list(cluster_1_markers.index))
-        	enrechment_table #gene list enrichment analysis result for the cell-type marker genes ub cluster-1
+                cluster_1_markers=mictiObject.get_markers_by_Pvalues_and_Zscore(1, threshold_pvalue=.01,threshold_z_score=0)
 
-        Creating MICTI object for clustering cells into pre-defined k clusters:
+        Markers heatmap plots:
 
-        	mictiObject_1=MARKER.MICTI(datamatrix.T, geneName, cellName, cluster_assignment=None, th=0, ensembel=False, organisum="hsapiens")
+        	mictiObject.heatMap()
 
-        Cluster cells into k=6 clusters using Gaussian mixture model- method="GM", and k-means - method="kmeans"
+        Markers Radar plots:
 
-        	mictiObject_1.cluster_cells(6, method="GM", maxiter=1000)
+        	mictiObject.get_Radar_plot()
 
-        Get marker genes for cluster-2:
-
-        	cluster2_markers=mictiObject_1.get_markers_by_Pvalues_and_Zscore(2, threshold_pvalue=.01, threshold_z_score=0)
+        Gene Ontology enrichment analysis for cell-type marker genes in each of cell-type clusters
 
-        Perform gene list enrichment analysis:
+        	enrechment_table=mictiObject.get_gene_list_over_representation_analysis(list(cluster_1_markers.index))
+        	enrechment_table #gene-list enrichment analysis result for the cell-type marker genes for cluster-1
 
-        	enrechment_table=mictiObject_1.get_gene_list_over_representation_analysis(list(cluster2_markers.index))
 
         Licence
         -------

diff --git a/MICTI.egg-info/SOURCES.txt b/MICTI.egg-info/SOURCES.txt
diff --git a/MICTI.egg-info/dependency_links.txt b/MICTI.egg-info/dependency_links.txt
diff --git a/MICTI.egg-info/requires.txt b/MICTI.egg-info/requires.txt
diff --git a/MICTI.egg-info/top_level.txt b/MICTI.egg-info/top_level.txt
diff --git a/build/lib/MICTI/GeoMinner.py b/build/lib/MICTI/GeoMinner.py
@@ -0,0 +1,116 @@
+import json
+import requests
+import urllib.request
+import gzip
+from bs4 import BeautifulSoup
+class GEOMinner:
+    def __init__(self,geoID):
+        self.geoId=geoID
+        self.series,self.platform,self.samples=self.getMetadataFromGEOID()
+
+    def getDownloadLinks(self):
+        url="https://www.ncbi.nlm.nih.gov/gds/?term="+str(self.geoId)+"&report=DocSums&format=text"
+        GeoIdnnn=self.geoId[:len(self.geoId)-3]+"nnn"
+        downloadLinks='geo/series/'+GeoIdnnn+'/'+self.geoId+'/'
+        return downloadLinks
+
+    def getSeriesMetadata(self,seriesXML):
+
+        series_status={}
+        for series_stat in seriesXML.status.children:
+            series_status[series_stat.name]=series_stat.string
+
+        series_info={}
+        for sample in seriesXML.children:
+            series_info[sample.name]=sample.string
+
+        series_info.update(series_status)
+
+        return series_info
+
+
+    def getPlatformMetadata(self,platformXML):
+
+        platform_status={}
+        for platform_stat in platformXML.status.children:
+            platform_status[platform_stat.name]=platform_stat.string
+
+        platform_info={}
+        for platform in platformXML.children:
+            platform_info[platform.name]=platform.string
+
+        platform_info.update(platform_status)
+        #print(platform_info)
+        return platform_info
+
+    def getSampleMetadata(self,sample):
+        sample_status={}
+        for samp_stat in sample.status.children:
+            sample_status[samp_stat.name]=samp_stat.string
+
+        channel={}
+        for chan_stat in sample.channel.children:
+            channel[chan_stat.name]=chan_stat.string
+
+        channel_char={}
+        for chan_stat_char in sample.channel.find_all("characteristics"):
+            channel[str(chan_stat_char.attrs["tag"])]=chan_stat_char.string
+
+        sample_info={}
+        for sample in sample.children:
+            sample_info[sample.name]=sample.string
+
+
+        platforms={}
+        for platform in sample.find_all("platform-ref"):
+            for j in platform:
+                print(j.name)
+                platforms[j.name]=j.string
+        channel.update(channel_char)
+        sample_info.update(sample_status)
+        sample_info.update(channel)
+        sample_info.update(platforms)
+
+        return sample_info
+
+    def getMetadataFromGEOID(self):
+
+        url="https://ftp.ncbi.nlm.nih.gov/"+self.getDownloadLinks()+"miniml/"+self.geoId+"_"+"family.xml.tgz"
+        r=urllib.request.urlopen(url)
+        rd = gzip.decompress(r.read())
+        data=BeautifulSoup(rd,"html5lib")
+        #print(data.sample.channel.find("characteristics").attrs["tag"])
+        samples={}
+        for sample in data.find_all("sample"):#.children:
+            #print(sample)
+            samples[sample.get("iid")] =self.getSampleMetadata(sample)
+            samples[sample.get("iid")]["series_accsesion"]=self.geoId
+
+            for i in sample.find_all("relation"):
+                if i.get("type")=="BioSample":
+                    #print(i.get("target")+"?report=full&format=text")
+                    samples[sample.get("iid")]["biosampleLink"]=i.get("target")+"?report=full&format=text"
+                elif i.get("type")=="SRA":
+                    samples[sample.get("iid")]["SRALink"]=i.get("target")+"&report=FullXml"
+
+
+        series={}
+        for serie in data.find_all("series"):#.children:
+            #series[serie.get("iid")] =self.getSeriesMetadata(serie)
+            series =self.getSeriesMetadata(serie)
+            #series[sample.get("iid")]["series_accsesion"]=GEOID
+        platforms={}
+        for platform in data.find_all("platform"):#.children:
+            #platforms.self.getPlatformMetadata(platform)
+            if platform.get("iid") not in list(platforms.keys()):
+                platforms[platform.get("iid")]=(self.getPlatformMetadata(platform))
+
+        print(platforms)
+
+        return series, platforms, samples
+    def getSamples(self):
+        return self.samples
+    def getPlatform(self):
+        return self.platform
+    def getSeries(self):
+        return self.series
diff --git a/build/lib/MICTI/HdpModel.py b/build/lib/MICTI/HdpModel.py
diff --git a/build/lib/MICTI/MARKER.py b/build/lib/MICTI/MARKER.py
@@ -2,9 +2,11 @@
 import numpy as np
 from scipy.sparse import csr_matrix, isspmatrix
 from MICTI import MARKERS
+from MICTI import normalize
+from MICTI import GeoMinner
 import sys
 
-def MICTI(sparceMatrix,geneNames,cellNames,k=None,cluster_assignment=None, th=0,normalized=True, ensembel=False, organisum="hsapiens"):
+def MICTI(sparceMatrix,geneNames,cellNames,k=None,cluster_assignment=None, th=0,normalized=True, UMI=False, ensembel=False, organisum="hsapiens"):
     #check sparcity of the matrix
     if(sparceMatrix.shape[0]!=len(cellNames)):
         print("The number of cells and the given cell names does not match")
@@ -33,10 +35,15 @@ def MICTI(sparceMatrix,geneNames,cellNames,k=None,cluster_assignment=None, th=0,
                 sparceMatrix=csr_matrix(sparceMatrix)
         else:
             if not isspmatrix(sparceMatrix):
-                sparceMatrix=normalizeUMIWithscalefactor(sparceMatrix)
+                if(UMI):
+                    sparceMatrix=normalize.normalizeUMIWithscalefactor(sparceMatrix)
+                else:
+                    sparceMatrix,geneNames=normalize.getTPM(sparceMatrix.T,gene_Names=geneNames,ensembol_gene=ensembel)
+                    #print(sparceMatrix.shape)
+
                 sparceMatrix=csr_matrix(sparceMatrix)
             else:
-                sparceMatrix=normalizeUMIWithscalefactor(sparceMatrix.toarray())
+                sparceMatrix=normalize.normalizeUMIWithscalefactor(sparceMatrix)
                 sparceMatrix=csr_matrix(sparceMatrix)
         #creat micti object
         micti_obj=MARKERS.MICTI(sparceMatrix,geneNames,cellNames,k=kk,cluster_label=cluster_labels,cluster_assignment=labelArray, th=th, ensembel=ensembel, organisum=organisum)