Skip to content

Commit

Permalink
MICTI
Browse files Browse the repository at this point in the history
  • Loading branch information
insilicolife committed Jun 20, 2019
1 parent ba0e27f commit 248db3f
Show file tree
Hide file tree
Showing 16 changed files with 675 additions and 106 deletions.
27 changes: 10 additions & 17 deletions MICTI.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: MICTI
Version: 0.1.2
Version: 0.1.3
Summary: Feature extraction approach in single-cell gene expression profiling for cell-type marker identification.
Home-page: https://github.com/insilicolife/micti
Author: Nigatu Ayele
Expand Down Expand Up @@ -35,34 +35,27 @@ Description: MICTI- Marker gene Identification for Cell Type Identity

mictiObject=MARKER.MICTI(datamatrix, geneName, cellName, cluster_assignment=cell_type, k=None, th=0, ensembel=False, organisum="hsapiens")

2D visualisation with T-SNE:
2D visualisation with tSNE:

mictiObject.get_Visualization(dim=2, method="tsne")

Get MICTI marker genes:

cluster_1_markers=mictiObject.get_markers_by_Pvalues_and_Zscore(1, threshold_pvalue=.01,threshold_z_score=0)

Gene Ontology enrichment analysis for cell-type marker genes in each of cell-type clusters

enrechment_table=mictiObject.get_gene_list_over_representation_analysis(list(cluster_1_markers.index))
enrechment_table #gene list enrichment analysis result for the cell-type marker genes ub cluster-1
cluster_1_markers=mictiObject.get_markers_by_Pvalues_and_Zscore(1, threshold_pvalue=.01,threshold_z_score=0)

Creating MICTI object for clustering cells into pre-defined k clusters:
Markers heatmap plots:

mictiObject_1=MARKER.MICTI(datamatrix.T, geneName, cellName, cluster_assignment=None, th=0, ensembel=False, organisum="hsapiens")
mictiObject.heatMap()

Cluster cells into k=6 clusters using Gaussian mixture model- method="GM", and k-means - method="kmeans"
Markers Radar plots:

mictiObject_1.cluster_cells(6, method="GM", maxiter=1000)
mictiObject.get_Radar_plot()

Get marker genes for cluster-2:

cluster2_markers=mictiObject_1.get_markers_by_Pvalues_and_Zscore(2, threshold_pvalue=.01, threshold_z_score=0)
Gene Ontology enrichment analysis for cell-type marker genes in each of cell-type clusters

Perform gene list enrichment analysis:
enrechment_table=mictiObject.get_gene_list_over_representation_analysis(list(cluster_1_markers.index))
enrechment_table #gene-list enrichment analysis result for the cell-type marker genes for cluster-1

enrechment_table=mictiObject_1.get_gene_list_over_representation_analysis(list(cluster2_markers.index))

Licence
-------
Expand Down
20 changes: 17 additions & 3 deletions MICTI.egg-info/SOURCES.txt
Git LFS file not shown
4 changes: 1 addition & 3 deletions MICTI.egg-info/dependency_links.txt
Git LFS file not shown
19 changes: 16 additions & 3 deletions MICTI.egg-info/requires.txt
Git LFS file not shown
4 changes: 1 addition & 3 deletions MICTI.egg-info/top_level.txt
Git LFS file not shown
116 changes: 116 additions & 0 deletions build/lib/MICTI/GeoMinner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import json
import requests
import urllib.request
import gzip
from bs4 import BeautifulSoup
class GEOMinner:
def __init__(self,geoID):
self.geoId=geoID
self.series,self.platform,self.samples=self.getMetadataFromGEOID()

def getDownloadLinks(self):
url="https://www.ncbi.nlm.nih.gov/gds/?term="+str(self.geoId)+"&report=DocSums&format=text"
GeoIdnnn=self.geoId[:len(self.geoId)-3]+"nnn"
downloadLinks='geo/series/'+GeoIdnnn+'/'+self.geoId+'/'
return downloadLinks

def getSeriesMetadata(self,seriesXML):

series_status={}
for series_stat in seriesXML.status.children:
series_status[series_stat.name]=series_stat.string

series_info={}
for sample in seriesXML.children:
series_info[sample.name]=sample.string

series_info.update(series_status)

return series_info


def getPlatformMetadata(self,platformXML):

platform_status={}
for platform_stat in platformXML.status.children:
platform_status[platform_stat.name]=platform_stat.string

platform_info={}
for platform in platformXML.children:
platform_info[platform.name]=platform.string

platform_info.update(platform_status)
#print(platform_info)
return platform_info

def getSampleMetadata(self,sample):
sample_status={}
for samp_stat in sample.status.children:
sample_status[samp_stat.name]=samp_stat.string

channel={}
for chan_stat in sample.channel.children:
channel[chan_stat.name]=chan_stat.string

channel_char={}
for chan_stat_char in sample.channel.find_all("characteristics"):
channel[str(chan_stat_char.attrs["tag"])]=chan_stat_char.string

sample_info={}
for sample in sample.children:
sample_info[sample.name]=sample.string


platforms={}
for platform in sample.find_all("platform-ref"):
for j in platform:
print(j.name)
platforms[j.name]=j.string
channel.update(channel_char)
sample_info.update(sample_status)
sample_info.update(channel)
sample_info.update(platforms)

return sample_info

def getMetadataFromGEOID(self):

url="https://ftp.ncbi.nlm.nih.gov/"+self.getDownloadLinks()+"miniml/"+self.geoId+"_"+"family.xml.tgz"
r=urllib.request.urlopen(url)
rd = gzip.decompress(r.read())
data=BeautifulSoup(rd,"html5lib")
#print(data.sample.channel.find("characteristics").attrs["tag"])
samples={}
for sample in data.find_all("sample"):#.children:
#print(sample)
samples[sample.get("iid")] =self.getSampleMetadata(sample)
samples[sample.get("iid")]["series_accsesion"]=self.geoId

for i in sample.find_all("relation"):
if i.get("type")=="BioSample":
#print(i.get("target")+"?report=full&format=text")
samples[sample.get("iid")]["biosampleLink"]=i.get("target")+"?report=full&format=text"
elif i.get("type")=="SRA":
samples[sample.get("iid")]["SRALink"]=i.get("target")+"&report=FullXml"


series={}
for serie in data.find_all("series"):#.children:
#series[serie.get("iid")] =self.getSeriesMetadata(serie)
series =self.getSeriesMetadata(serie)
#series[sample.get("iid")]["series_accsesion"]=GEOID
platforms={}
for platform in data.find_all("platform"):#.children:
#platforms.self.getPlatformMetadata(platform)
if platform.get("iid") not in list(platforms.keys()):
platforms[platform.get("iid")]=(self.getPlatformMetadata(platform))

print(platforms)

return series, platforms, samples
def getSamples(self):
return self.samples
def getPlatform(self):
return self.platform
def getSeries(self):
return self.series
Empty file added build/lib/MICTI/HdpModel.py
Empty file.
13 changes: 10 additions & 3 deletions build/lib/MICTI/MARKER.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix
from MICTI import MARKERS
from MICTI import normalize
from MICTI import GeoMinner
import sys

def MICTI(sparceMatrix,geneNames,cellNames,k=None,cluster_assignment=None, th=0,normalized=True, ensembel=False, organisum="hsapiens"):
def MICTI(sparceMatrix,geneNames,cellNames,k=None,cluster_assignment=None, th=0,normalized=True, UMI=False, ensembel=False, organisum="hsapiens"):
#check sparcity of the matrix
if(sparceMatrix.shape[0]!=len(cellNames)):
print("The number of cells and the given cell names does not match")
Expand Down Expand Up @@ -33,10 +35,15 @@ def MICTI(sparceMatrix,geneNames,cellNames,k=None,cluster_assignment=None, th=0,
sparceMatrix=csr_matrix(sparceMatrix)
else:
if not isspmatrix(sparceMatrix):
sparceMatrix=normalizeUMIWithscalefactor(sparceMatrix)
if(UMI):
sparceMatrix=normalize.normalizeUMIWithscalefactor(sparceMatrix)
else:
sparceMatrix,geneNames=normalize.getTPM(sparceMatrix.T,gene_Names=geneNames,ensembol_gene=ensembel)
#print(sparceMatrix.shape)

sparceMatrix=csr_matrix(sparceMatrix)
else:
sparceMatrix=normalizeUMIWithscalefactor(sparceMatrix.toarray())
sparceMatrix=normalize.normalizeUMIWithscalefactor(sparceMatrix)
sparceMatrix=csr_matrix(sparceMatrix)
#creat micti object
micti_obj=MARKERS.MICTI(sparceMatrix,geneNames,cellNames,k=kk,cluster_label=cluster_labels,cluster_assignment=labelArray, th=th, ensembel=ensembel, organisum=organisum)
Expand Down
Loading

0 comments on commit 248db3f

Please sign in to comment.