SENIC的使⽤
软件介绍
SENIC是⼀种同时重建基因调控⽹络并从单细胞RNA-q数据中鉴定stablecellstates的⼯具。基于共表达和DNA模基序(motif)分析推断基因调控⽹络,然后在每个
细胞中分析⽹络活性以鉴定细胞状态
输⼊:SCENIC需要输⼊的是单细胞RNA-q表达矩阵——每列对应于样品(细胞),每⾏对应⼀个基因。基因ID应该是gene-symbol并存储为rownames(尤其是基因
名字部分是为了与RcisTarget数据库兼容);表达数据是Gene的readscount。根据作者的测试,提供原始的或NormalizedUMIcount,⽆论是否log转换,或使⽤TPM
值,结果相差不⼤。
软件的安装
if(!requireNamespace("BiocManager",quietly=TRUE))es("BiocManager")
BiocManager::install(c("AUCell","RcisTarget"))
BiocManager::install(c("GENIE3"))
BiocManager::install(c("zoo","mixtools","rbokeh"))
BiocManager::install(c("DT","NMF","pheatmap","R2HTML","Rtsne"))
BiocManager::install(c("doMC","doRNG"))
BiocManager::install(c("SingleCellExperiment"))
if(!requireNamespace("devtools",quietly=TRUE))es("devtools")
devtools::install_github("aertslab/SCopeLoomR",build_vignettes=TRUE)
if(!requireNamespace("devtools",quietly=TRUE))es("devtools")
devtools::install_github("aertslab/SCENIC")
packageVersion("SCENIC")
下载评分数据库
ForHuman,Mou,Fly
mm_url="/cistarget/databas/mus_musculus/mm9/refq_r45/mc9nr/gene_bad/r"
mm_url2="/cistarget/databas/mus_musculus/mm9/refq_r45/mc9nr/gene_bad/r"
hg_url="/cistarget/databas/homo_sapiens/hg19/refq_r45/mc9nr/gene_bad/r"
hg_url2="/cistarget/databas/homo_sapiens/hg19/refq_r45/mc9nr/gene_bad/r"
fly_url="/cistarget/databas/drosophila_melanogaster/dm6/flyba_r6.02/mc8nr/gene_bad/r"
wget-c$mm_url
wget-c$mm_url2
wget-c$hg_url
wget-c$hg_url2
wget-c$fly_url
不同数据格式的读⼊
对于loom⽂件
("/clone/Previously%20Published/","")
loomPath<-""
10x的输出⽂件
singleCellMatrix<-Seurat::Read10X(="data/pbmc3k/filtered_gene_bc_matrices/hg19/")
cellInfo<-(uratCluster=Idents(uratObject))
Robjects(,SingleCellExperiment)
sce<-load_as_sce(dataPath)#anySingleCellExperimentobject
exprMat<-counts(sce)
cellInfo<-colData(sce)
简单的SENIC⼯作流程
twd("/media/sdb/project/20200223/SCENIC_MouBrain")
loomPath<-(package="SCENIC","examples/mouBrain_")
library(SCopeLoomR)
loom<-open_loom(loomPath)
exprMat<-get_dgem(loom)
cellInfo<-get_cellAnnotation(loom)
clo_loom(loom)
#查看矩阵⼤⼩
#dim(exprMat)
library(SCENIC)
#scenicOptions<-initializeScenic(org="mgi",dbDir="cisTarget_databas",nCores=10)
scenicOptions<-initializeScenic(org="mgi",dbDir="/media/sdb/project/20200223/data",nCores=10)
saveRDS(scenicOptions,file="int/")
###Co-expressionnetwork
genesKept<-geneFiltering(exprMat,scenicOptions)
exprMat_filtered<-exprMat[genesKept,]
runCorrelation(exprMat_filtered,scenicOptions)
exprMat_filtered_log<-log2(exprMat_filtered+1)
runGenie3(exprMat_filtered_log,scenicOptions)
###BuildandscoretheGRN
exprMat_log<-log2(exprMat+1)
scenicOptions@ttings$dbs<-scenicOptions@ttings$dbs["10kb"]#Toyrunttings
runSCENIC_1_coexNetwork2modules(scenicOptions)
runSCENIC_2_createRegulons(scenicOptions,coexMethod=c("top5perTarget"))#Toyrunttings
runSCENIC_3_scoreCells(scenicOptions,exprMat_log)
#Export:运⾏这个时可能报错
#export2scope(scenicOptions,exprMat)
#Binarizeactivity?
#aucellApp<-plotTsne_AUCellApp(scenicOptions,exprMat_log)
#savedSelections<-shiny::runApp(aucellApp)
#newThresholds<-savedSelections$thresholds
#scenicOptions@fileNames$int["aucell_thresholds",1]<-"int/"
#saveRDS(newThresholds,file=getIntName(scenicOptions,"aucell_thresholds"))
#saveRDS(scenicOptions,file="int/")
runSCENIC_4_aucell_binarize(scenicOptions)
###Exploringoutput
#Checkfilesinfolder'output'
#.loomfile@
#output/Step2_MotifEnrichment_detail/subt:
motifEnrichment_lfMotifs_wGenes<-loadInt(scenicOptions,"motifEnrichment_lfMotifs_wGenes")
tableSubt<-motifEnrichment_lfMotifs_wGenes[highlightedTFs=="Sox8"]
viewMotifs(tableSubt)
#output/Step2_etail:
regulonTargetsInfo<-loadInt(scenicOptions,"regulonTargetsInfo")
tableSubt<-regulonTargetsInfo[TF=="Stat6"&highConfAnnot==TRUE]
viewMotifs(tableSubt)
运⾏SENIC
建⽴基因调控⽹络(GeneRegulationNetwork,GRN):
1.基于共表达识别每个转录因⼦TF的潜在靶标。过滤表达矩阵并运⾏GENIE3或者GRNBoost,它们是利⽤表达矩阵推断基因调控⽹络的⼀种算法,能得到转录因⼦
和潜在靶标的相关性⽹络;将⽬标从GENIE3或者GRNBoost格式转为共表达模块。
2.根据DNA模序分析(motif)选择潜在的直接结合靶标(调节因⼦)(利⽤RcisTarget包:TF基序分析)
确定细胞状态及其调节因⼦:
3.分析每个细胞中的⽹络活性(AUCell)在细胞中评分调节⼦(计算AUC)
SCENIC完整流程
导⼊数据
loomPath<-(package="SCENIC","examples/mouBrain_")
library(SCopeLoomR)
loom<-open_loom(loomPath)#mode='r'如果报错可以加上
exprMat<-get_dgem(loom)
cellInfo<-get_cellAnnotation(loom)
clo_loom(loom)
Initializettings初始设置,导⼊评分数据库
library(SCENIC)
#先下载数据库,org⽤来选择物种,这⾥选择的是⼩⿏
scenicOptions<-initializeScenic(org="mgi",dbDir="cisTarget_databas",nCores=10)
#scenicOptions@inputDatatInfo$cellInfo<-"int/"
saveRDS(scenicOptions,file="int/")
共表达⽹络
根据已有的表达矩阵推断潜在的转录因⼦靶标,使⽤GENIE3或GRNBoost。⾸先需要进⾏基因的过滤。
genesKept<-geneFiltering(exprMat,scenicOptions=scenicOptions,
minCountsPerGene=3*.01*ncol(exprMat),
minSamples=ncol(exprMat)*.01)
过滤表达矩阵,保留只有过滤后的基因
exprMat_filtered<-exprMat[genesKept,]
计算相关性,这⼀步时间会⽐较长
runCorrelation(exprMat_filtered,scenicOptions)
exprMat_filtered_log<-log2(exprMat_filtered+1)
runGenie3(exprMat_filtered_log,scenicOptions)
BuildandscoretheGRN构建并给基因调控⽹络(GRN)打分
exprMat_log<-log2(exprMat+1)
scenicOptions@ttings$dbs<-scenicOptions@ttings$dbs["10kb"]#Toyrunttings
runSCENIC_1_coexNetwork2modules(scenicOptions)
runSCENIC_2_createRegulons(scenicOptions,coexMethod=c("top5perTarget"))#Toyrunttings
runSCENIC_3_scoreCells(scenicOptions,exprMat_log)
输⼊表达矩阵
在本教程中,我们提供了⼀个⽰例,样本是⼩⿏⼤脑的200个细胞和862个基因:
loomPath<-(package="SCENIC","examples/mouBrain_")
打开loom⽂件并加载表达矩阵;
library(SCopeLoomR)
loom<-open_loom(loomPath,mode="r")
exprMat<-get_dgem(loom)
cellInfo<-get_cellAnnotation(loom)
clo_loom(loom)
#dim(exprMat)
细胞信息/表型
#cellInfo$nGene<-colSums(exprMat>0)
head(cellInfo)
cellInfo<-(cellInfo)
cellTypeColumn<-"Class"
colnames(cellInfo)[which(colnames(cellInfo)==cellTypeColumn)]<-"CellType"
cbind(table(cellInfo$CellType))
saveRDS(cellInfo,file="int/")
#Colortoassigntothevariables(sameformatasforNMF::aheatmap)
colVars<-list(CellType=c("microglia"="forestgreen",
"endothelial-mural"="darkorange",
"astrocytes_ependymal"="magenta4",
"oligodendrocytes"="hotpink",
"interneurons"="red3",
"pyramidalCA1"="skyblue",
"pyramidaSS"="darkblue"
))
colVars$CellType<-colVars$CellType[interct(names(colVars$CellType),cellInfo$CellType)]
saveRDS(colVars,file="int/")
()
legend(0,1,fill=colVars$CellType,legend=names(colVars$CellType))
初始化SCENIC设置
为了在SCENIC的多个步骤中保持设置⼀致,SCENIC包中的⼤多数函数使⽤⼀个公共对象,该对象存储当前运⾏的选项并代替⼤多数函数的“参数”。⽐如下⾯
的org,dbDir等,可以在开始就将物种rog(mgi——mou,hgnc——human,dmel——fly)和RcisTarge数据库位置分别读给对象org,dbDir,之后统⼀⽤函
数initializeScenic得到对象scenicOptions。具体参数设置可以⽤?initializeScenichelp⼀下。
library(SCENIC)
org="mgi"#orhgnc,or
dmeldbDir="cisTarget_databas"#RcisTargetdatabaslocation
myDatatTitle="SCENICexampleonMoubrain"#chooanameforyouranalysis
data(defaultDbNames)
dbs<-defaultDbNames[[org]]
scenicOptions<-initializeScenic(org=org,dbDir=dbDir,dbs=dbs,datatTitle=myDatatTitle,nCores=10)
#如果有需要就修改这个地⽅
scenicOptions@inputDatatInfo$cellInfo<-"int/
"scenicOptions@inputDatatInfo$colVars<-"int/"
#Databas:
#scenicOptions@ttings$dbs<-c("mm9-5kb-mc8nr"="r")
#scenicOptions@ttings$db_mcVersion<-"v8"
#Savetouatalatertime...
saveRDS(scenicOptions,file="int/")
共表达⽹络
SCENIC⼯作流程的第⼀步是根据表达数据推断潜在的转录因⼦靶标。为此,我们使⽤GENIE3或GRNBoost,输⼊⽂件是表达矩阵(过滤后的)和转录因⼦列表。
GENIE3/GRBBoost的输出结果和相关矩阵将⽤于创建共表达模块(runSCENIC_1_coexNetwork2modules())。
基因过滤/选择
按每个基因的reads总数进⾏过滤。该filter旨在去除最可能是噪⾳的基因。默认情况下,它(minCountsPerGene)保留所有样品中⾄少带有6个UMIreads的基因(例
如,如果在1%的细胞中以3的值表达,则基因将具有的总数)。
通过基因的细胞数来实现过滤(例如UMI>0,或log2(TPM)>1)。默认情况下(minSamples),保留下来的基因能在⾄少1%的细胞中检测得到。
最后,只保留RcisTarget数据库中可⽤的基因。
#(Adjustminimumvaluesaccordingtoyourdatat)
genesKept<-geneFiltering(exprMat,scenicOptions=scenicOptions,
minCountsPerGene=3*.01*ncol(exprMat),
minSamples=ncol(exprMat)*.01)
在进⾏⽹络推断之前,检查是否有任何已知的相关基因被过滤掉(如果缺少任何相关基因,请仔细检查filter设置是否合适):
interestingGenes<-c("Sox9","Sox10","Dlx5")
interestingGenes[which(!interestingGenes%in%genesKept)]
相关性
GENIE33或者GRNBoost可以检测正负关联。为了区分潜在的激活和抑制,我们将⽬标分为正相关和负相关⽬标(⽐如TF与潜在⽬标之间的Spearman相关性)。
runCorrelation(exprMat_filtered,scenicOptions)
运⾏GENIE3得到潜在转录因⼦TF
##Iflaunchedinanewssion,youwillneedtoreload...
#twd("...")
#loomPath<-"..."
#loom<-open_loom(loomPath,mode="r")
#exprMat<-get_dgem(loom)
#clo_loom(loom)
#genesKept<-loadInt(scenicOptions,"genesKept")
#exprMat_filtered<-exprMat[genesKept,]
#library(SCENIC)
#scenicOptions<-readRDS("int/")
#Optional:addlog(ifitisnotlogged/normalizedalready)
exprMat_filtered<-log2(exprMat_filtered+1)
#RunGENIE3
runGenie3(exprMat_filtered,scenicOptions)
构建并评分GRN(runSCENIC_…)
必要时重新加载表达式矩阵:
loom<-open_loom(loomPath,mode="r")
exprMat<-get_dgem(loom)
clo_loom(loom)
#Optional:logexpression(forTFexpressionplot,itdoesnotaffectanyothercalculation)
logMat<-log2(exprMat+1)
dim(exprMat)
使⽤wrapper函数运⾏其余步骤:
library(SCENIC)
scenicOptions<-readRDS("int/")
scenicOptions@ttings$verbo<-TRUE
scenicOptions@ttings$nCores<-10
scenicOptions@ttings$ed<-123
#Foraveryquickrun:
#coexMethod=c("top5perTarget")
scenicOptions@ttings$dbs<-scenicOptions@ttings$dbs["10kb"]#Fortoyrun
#save...
runSCENIC_1_coexNetwork2modules(scenicOptions)
runSCENIC_2_createRegulons(scenicOptions,coexMethod=c("top5perTarget"))#**Onlyfortoyrun!!只⽤于测试数据
runSCENIC_3_scoreCells(scenicOptions,logMat)
可选步骤
将networkactivity转换成ON/OFF(⼆进制)格式
nPcs<-c(5)#Fortoydatat
#nPcs<-c(5,15,50)
scenicOptions@ttings$ed<-123#sameedforallofthem
#使⽤不同的参数运⾏t-SNE
fileNames<-tsneAUC(scenicOptions,aucType="AUC",nPcs=nPcs,perpl=c(5,15,50))
fileNames<-tsneAUC(scenicOptions,aucType="AUC",nPcs=nPcs,perpl=c(5,15,50),onlyHighConf=TRUE,filePrefix="int/tSNE_oHC")
#画图(individualfilesinint/):
fileNames<-paste0("int/",grep(".Rds",grep("tSNE_",("int"),value=T),value=T))
par(mfrow=c(length(nPcs),3))
fileNames<-paste0("int/",grep(".Rds",grep("tSNE_AUC",("int"),value=T,perl=T),value=T))
plotTsne_compareSettings(fileNames,scenicOptions,showLegend=FALSE,varName="CellType",cex=.5)
#Usingonly"high-confidence"regulons(normallysimilar)
par(mfrow=c(3,3))
fileNames<-paste0("int/",grep(".Rds",grep("tSNE_oHC_AUC",("int"),value=T,perl=T),value=T))
plotTsne_compareSettings(fileNames,scenicOptions,showLegend=FALSE,varName="CellType",cex=.5)
输出到loom/SCope
#DGEM(Digitalgeneexpressionmatrix)
#(non-normalizedcounts)
#exprMat<-get_dgem(open_loom(loomPath))
#dgem<-exprMat
#head(colnames(dgem))#shouldcontaintheCellID/name
#Export:
scenicOptions@fileNames$output["loomFile",]<-"output/mouBrain_"
export2scope(scenicOptions,exprMat)
加载.loom⽂件中的结果
SCopeLoomR中也有函数可以导⼊.loom⽂件中的内容,⽐如调节因⼦,AUC和封装内容(⽐如regulonactivity的t-SNE和UMAP结果)。
library(SCopeLoomR)
scenicLoomPath<-getOutName(scenicOptions,"loomFile")
loom<-open_loom(scenicLoomPath)
#Readinformationfromloomfile:
regulons_incidMat<-get_regulons(loom)
regulons<-regulonsToGeneLists(regulons_incidMat)
regulonsAUC<-get_regulonsAuc(loom)
regulonsAucThresholds<-get_regulonThresholds(loom)
embeddings<-get_embeddings(loom)
解读结果
1.细胞状态
AUCell提供跨细胞的调节⼦的活性,AUCell使⽤“AreaunderCurve曲线下⾯积”(AUC)来计算输⼊基因集的关键⼦集是否在每个细胞的表达基因中富集。通过该调节⼦活
性(连续或⼆进制AUC矩阵)来聚类细胞,我们可以看出是否存在倾向于具有相同调节⼦活性的细胞群,并揭⽰在多个细胞中反复发⽣的⽹络状态。这些状态等同于⽹
络的吸引⼦状态。将这些聚类与不同的可视化⽅法相结合,我们可以探索细胞状态与特定调节⼦的关联。
将AUC和TF表达投射到t-SNE上
logMat<-exprMat#Betterifitislogged/normalized
aucellApp<-plotTsne_AUCellApp(scenicOptions,logMat)#defaultt-SNE
savedSelections<-shiny::runApp(aucellApp)
print(tsneFileName(scenicOptions))
tSNE_scenic<-readRDS(tsneFileName(scenicOptions))
aucell_regulonAUC<-loadInt(scenicOptions,"aucell_regulonAUC")
#ShowTFexpression:
par(mfrow=c(2,3))
AUCell::AUCell_plotTSNE(tSNE_scenic$Y,exprMat,aucell_regulonAUC[onlyNonDuplicatedExtended(rownames(aucell_regulonAUC))[c("Dlx5","Sox10","Sox9","Irf1","Stat6")],],plots="Expression")
#保存AUC图⽚:
Cairo::CairoPDF("output/Step4_BinaryRegulonActivity_tSNE_",width=20,height=15)
par(mfrow=c(4,6))
AUCell::AUCell_plotTSNE(tSNE_scenic$Y,cellsAUC=aucell_regulonAUC,plots="AUC")
()
library(KernSmooth)
library(RColorBrewer)
dens2d<-bkde2D(tSNE_scenic$Y,1)$fhat
image(dens2d,col=(9,"YlOrBr"),axes=FALSE)
contour(dens2d,add=TRUE,nlevels=5,drawlabels=FALSE)
#par(bg="black")
par(mfrow=c(1,2))
regulonNames<-c("Dlx5","Sox10")
cellCol<-plotTsne_rgb(scenicOptions,regulonNames,aucType="AUC",aucMaxContrast=0.6)
text(0,10,attr(cellCol,"red"),col="red",cex=.7,pos=4)
text(-20,-10,attr(cellCol,"green"),col="green3",cex=.7,pos=4)
regulonNames<-list(red=c("Sox10","Sox8"),
green=c("Irf1"),
blue=c("Tef"))
cellCol<-plotTsne_rgb(scenicOptions,regulonNames,aucType="Binary")
text(5,15,attr(cellCol,"red"),col="red",cex=.7,pos=4)
text(5,15-4,attr(cellCol,"green"),col="green3",cex=.7,pos=4)
text(5,15-8,attr(cellCol,"blue"),col="blue",cex=.7,pos=4)
GRN:Regulon靶标和模序
regulons<-loadInt(scenicOptions,"regulons")
regulons[c("Dlx5","Irf1")]
regulons<-loadInt(scenicOptions,"aucell_regulons")
head(cbind(onlyNonDuplicatedExtended(names(regulons))))
regulonTargetsInfo<-loadInt(scenicOptions,"regulonTargetsInfo")
tableSubt<-regulonTargetsInfo[TF=="Stat6"&highConfAnnot==TRUE]
viewMotifs(tableSubt)
2.细胞群的调控因⼦
regulonAUC<-loadInt(scenicOptions,"aucell_regulonAUC")
regulonAUC<-regulonAUC[onlyNonDuplicatedExtended(rownames(regulonAUC)),]
regulonActivity_byCellType<-sapply(split(rownames(cellInfo),cellInfo$CellType),function(cells)rowMeans(getAUC(regulonAUC)[,cells]))
regulonActivity_byCellType_Scaled<-t(scale(t(regulonActivity_byCellType),center=T,scale=T))
pheatmap::pheatmap(regulonActivity_byCellType_Scaled,color=colorRampPalette(c("blue","white","red"))(100),breaks=q(-3,3,=100),treeheight_row=10,treeheight_col=10,border_color=NA)
#filename="regulonActivity_",width=10,height=20)
topRegulators<-reshape2::melt(regulonActivity_byCellType_Scaled)
colnames(topRegulators)<-c("Regulon","CellType","RelativeActivity")
topRegulators<-topRegulators[which(topRegulators$RelativeActivity>0),]
viewTable(topRegulators)
minPerc<-.7
binaryRegulonActivity<-loadInt(scenicOptions,"aucell_binary_nonDupl")
cellInfo_binarizedCells<-cellInfo[which(rownames(cellInfo)%in%colnames(binaryRegulonActivity)),,drop=FALSE]
regulonActivity_byCellType_Binarized<-sapply(split(rownames(cellInfo_binarizedCells),cellInfo_binarizedCells$CellType),function(cells)rowMeans(binaryRegulonActivity[,cells,drop=FALSE]))
binaryActPerc_subt<-regulonActivity_byCellType_Binarized[which(rowSums(regulonActivity_byCellType_Binarized>minPerc)>0),]
pheatmap::pheatmap(binaryActPerc_subt,color=colorRampPalette(c("white","pink","red"))(100),breaks=q(0,1,=100),treeheight_row=10,treeheight_col=10,border_color=NA)
本文发布于:2022-12-28 12:22:02,感谢您对本站的认可!
本文链接:http://www.wtabcd.cn/fanwen/fan/90/46597.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |