loom

更新时间:2022-12-28 12:22:02 阅读: 评论:0


2022年12月28日发(作者:像素论坛)

SENIC的使⽤

软件介绍

SENIC是⼀种同时重建基因调控⽹络并从单细胞RNA-q数据中鉴定stablecellstates的⼯具。基于共表达和DNA模基序(motif)分析推断基因调控⽹络,然后在每个

细胞中分析⽹络活性以鉴定细胞状态

输⼊:SCENIC需要输⼊的是单细胞RNA-q表达矩阵——每列对应于样品(细胞),每⾏对应⼀个基因。基因ID应该是gene-symbol并存储为rownames(尤其是基因

名字部分是为了与RcisTarget数据库兼容);表达数据是Gene的readscount。根据作者的测试,提供原始的或NormalizedUMIcount,⽆论是否log转换,或使⽤TPM

值,结果相差不⼤。

软件的安装

if(!requireNamespace("BiocManager",quietly=TRUE))es("BiocManager")

BiocManager::install(c("AUCell","RcisTarget"))

BiocManager::install(c("GENIE3"))

BiocManager::install(c("zoo","mixtools","rbokeh"))

BiocManager::install(c("DT","NMF","pheatmap","R2HTML","Rtsne"))

BiocManager::install(c("doMC","doRNG"))

BiocManager::install(c("SingleCellExperiment"))

if(!requireNamespace("devtools",quietly=TRUE))es("devtools")

devtools::install_github("aertslab/SCopeLoomR",build_vignettes=TRUE)

if(!requireNamespace("devtools",quietly=TRUE))es("devtools")

devtools::install_github("aertslab/SCENIC")

packageVersion("SCENIC")

下载评分数据库

ForHuman,Mou,Fly

mm_url="/cistarget/databas/mus_musculus/mm9/refq_r45/mc9nr/gene_bad/r"

mm_url2="/cistarget/databas/mus_musculus/mm9/refq_r45/mc9nr/gene_bad/r"

hg_url="/cistarget/databas/homo_sapiens/hg19/refq_r45/mc9nr/gene_bad/r"

hg_url2="/cistarget/databas/homo_sapiens/hg19/refq_r45/mc9nr/gene_bad/r"

fly_url="/cistarget/databas/drosophila_melanogaster/dm6/flyba_r6.02/mc8nr/gene_bad/r"

wget-c$mm_url

wget-c$mm_url2

wget-c$hg_url

wget-c$hg_url2

wget-c$fly_url

不同数据格式的读⼊

对于loom⽂件

("/clone/Previously%20Published/","")

loomPath<-""

10x的输出⽂件

singleCellMatrix<-Seurat::Read10X(="data/pbmc3k/filtered_gene_bc_matrices/hg19/")

cellInfo<-(uratCluster=Idents(uratObject))

Robjects(,SingleCellExperiment)

sce<-load_as_sce(dataPath)#anySingleCellExperimentobject

exprMat<-counts(sce)

cellInfo<-colData(sce)

简单的SENIC⼯作流程

twd("/media/sdb/project/20200223/SCENIC_MouBrain")

loomPath<-(package="SCENIC","examples/mouBrain_")

library(SCopeLoomR)

loom<-open_loom(loomPath)

exprMat<-get_dgem(loom)

cellInfo<-get_cellAnnotation(loom)

clo_loom(loom)

#查看矩阵⼤⼩

#dim(exprMat)

library(SCENIC)

#scenicOptions<-initializeScenic(org="mgi",dbDir="cisTarget_databas",nCores=10)

scenicOptions<-initializeScenic(org="mgi",dbDir="/media/sdb/project/20200223/data",nCores=10)

saveRDS(scenicOptions,file="int/")

###Co-expressionnetwork

genesKept<-geneFiltering(exprMat,scenicOptions)

exprMat_filtered<-exprMat[genesKept,]

runCorrelation(exprMat_filtered,scenicOptions)

exprMat_filtered_log<-log2(exprMat_filtered+1)

runGenie3(exprMat_filtered_log,scenicOptions)

###BuildandscoretheGRN

exprMat_log<-log2(exprMat+1)

scenicOptions@ttings$dbs<-scenicOptions@ttings$dbs["10kb"]#Toyrunttings

runSCENIC_1_coexNetwork2modules(scenicOptions)

runSCENIC_2_createRegulons(scenicOptions,coexMethod=c("top5perTarget"))#Toyrunttings

runSCENIC_3_scoreCells(scenicOptions,exprMat_log)

#Export:运⾏这个时可能报错

#export2scope(scenicOptions,exprMat)

#Binarizeactivity?

#aucellApp<-plotTsne_AUCellApp(scenicOptions,exprMat_log)

#savedSelections<-shiny::runApp(aucellApp)

#newThresholds<-savedSelections$thresholds

#scenicOptions@fileNames$int["aucell_thresholds",1]<-"int/"

#saveRDS(newThresholds,file=getIntName(scenicOptions,"aucell_thresholds"))

#saveRDS(scenicOptions,file="int/")

runSCENIC_4_aucell_binarize(scenicOptions)

###Exploringoutput

#Checkfilesinfolder'output'

#.loomfile@

#output/Step2_MotifEnrichment_detail/subt:

motifEnrichment_lfMotifs_wGenes<-loadInt(scenicOptions,"motifEnrichment_lfMotifs_wGenes")

tableSubt<-motifEnrichment_lfMotifs_wGenes[highlightedTFs=="Sox8"]

viewMotifs(tableSubt)

#output/Step2_etail:

regulonTargetsInfo<-loadInt(scenicOptions,"regulonTargetsInfo")

tableSubt<-regulonTargetsInfo[TF=="Stat6"&highConfAnnot==TRUE]

viewMotifs(tableSubt)

运⾏SENIC

建⽴基因调控⽹络(GeneRegulationNetwork,GRN):

1.基于共表达识别每个转录因⼦TF的潜在靶标。过滤表达矩阵并运⾏GENIE3或者GRNBoost,它们是利⽤表达矩阵推断基因调控⽹络的⼀种算法,能得到转录因⼦

和潜在靶标的相关性⽹络;将⽬标从GENIE3或者GRNBoost格式转为共表达模块。

2.根据DNA模序分析(motif)选择潜在的直接结合靶标(调节因⼦)(利⽤RcisTarget包:TF基序分析)

确定细胞状态及其调节因⼦:

3.分析每个细胞中的⽹络活性(AUCell)在细胞中评分调节⼦(计算AUC)

SCENIC完整流程

导⼊数据

loomPath<-(package="SCENIC","examples/mouBrain_")

library(SCopeLoomR)

loom<-open_loom(loomPath)#mode='r'如果报错可以加上

exprMat<-get_dgem(loom)

cellInfo<-get_cellAnnotation(loom)

clo_loom(loom)

Initializettings初始设置,导⼊评分数据库

library(SCENIC)

#先下载数据库,org⽤来选择物种,这⾥选择的是⼩⿏

scenicOptions<-initializeScenic(org="mgi",dbDir="cisTarget_databas",nCores=10)

#scenicOptions@inputDatatInfo$cellInfo<-"int/"

saveRDS(scenicOptions,file="int/")

共表达⽹络

根据已有的表达矩阵推断潜在的转录因⼦靶标,使⽤GENIE3或GRNBoost。⾸先需要进⾏基因的过滤。

genesKept<-geneFiltering(exprMat,scenicOptions=scenicOptions,

minCountsPerGene=3*.01*ncol(exprMat),

minSamples=ncol(exprMat)*.01)

过滤表达矩阵,保留只有过滤后的基因

exprMat_filtered<-exprMat[genesKept,]

计算相关性,这⼀步时间会⽐较长

runCorrelation(exprMat_filtered,scenicOptions)

exprMat_filtered_log<-log2(exprMat_filtered+1)

runGenie3(exprMat_filtered_log,scenicOptions)

BuildandscoretheGRN构建并给基因调控⽹络(GRN)打分

exprMat_log<-log2(exprMat+1)

scenicOptions@ttings$dbs<-scenicOptions@ttings$dbs["10kb"]#Toyrunttings

runSCENIC_1_coexNetwork2modules(scenicOptions)

runSCENIC_2_createRegulons(scenicOptions,coexMethod=c("top5perTarget"))#Toyrunttings

runSCENIC_3_scoreCells(scenicOptions,exprMat_log)

输⼊表达矩阵

在本教程中,我们提供了⼀个⽰例,样本是⼩⿏⼤脑的200个细胞和862个基因:

loomPath<-(package="SCENIC","examples/mouBrain_")

打开loom⽂件并加载表达矩阵;

library(SCopeLoomR)

loom<-open_loom(loomPath,mode="r")

exprMat<-get_dgem(loom)

cellInfo<-get_cellAnnotation(loom)

clo_loom(loom)

#dim(exprMat)

细胞信息/表型

#cellInfo$nGene<-colSums(exprMat>0)

head(cellInfo)

cellInfo<-(cellInfo)

cellTypeColumn<-"Class"

colnames(cellInfo)[which(colnames(cellInfo)==cellTypeColumn)]<-"CellType"

cbind(table(cellInfo$CellType))

saveRDS(cellInfo,file="int/")

#Colortoassigntothevariables(sameformatasforNMF::aheatmap)

colVars<-list(CellType=c("microglia"="forestgreen",

"endothelial-mural"="darkorange",

"astrocytes_ependymal"="magenta4",

"oligodendrocytes"="hotpink",

"interneurons"="red3",

"pyramidalCA1"="skyblue",

"pyramidaSS"="darkblue"

))

colVars$CellType<-colVars$CellType[interct(names(colVars$CellType),cellInfo$CellType)]

saveRDS(colVars,file="int/")

()

legend(0,1,fill=colVars$CellType,legend=names(colVars$CellType))

初始化SCENIC设置

为了在SCENIC的多个步骤中保持设置⼀致,SCENIC包中的⼤多数函数使⽤⼀个公共对象,该对象存储当前运⾏的选项并代替⼤多数函数的“参数”。⽐如下⾯

的org,dbDir等,可以在开始就将物种rog(mgi——mou,hgnc——human,dmel——fly)和RcisTarge数据库位置分别读给对象org,dbDir,之后统⼀⽤函

数initializeScenic得到对象scenicOptions。具体参数设置可以⽤?initializeScenichelp⼀下。

library(SCENIC)

org="mgi"#orhgnc,or

dmeldbDir="cisTarget_databas"#RcisTargetdatabaslocation

myDatatTitle="SCENICexampleonMoubrain"#chooanameforyouranalysis

data(defaultDbNames)

dbs<-defaultDbNames[[org]]

scenicOptions<-initializeScenic(org=org,dbDir=dbDir,dbs=dbs,datatTitle=myDatatTitle,nCores=10)

#如果有需要就修改这个地⽅

scenicOptions@inputDatatInfo$cellInfo<-"int/

"scenicOptions@inputDatatInfo$colVars<-"int/"

#Databas:

#scenicOptions@ttings$dbs<-c("mm9-5kb-mc8nr"="r")

#scenicOptions@ttings$db_mcVersion<-"v8"

#Savetouatalatertime...

saveRDS(scenicOptions,file="int/")

共表达⽹络

SCENIC⼯作流程的第⼀步是根据表达数据推断潜在的转录因⼦靶标。为此,我们使⽤GENIE3或GRNBoost,输⼊⽂件是表达矩阵(过滤后的)和转录因⼦列表。

GENIE3/GRBBoost的输出结果和相关矩阵将⽤于创建共表达模块(runSCENIC_1_coexNetwork2modules())。

基因过滤/选择

按每个基因的reads总数进⾏过滤。该filter旨在去除最可能是噪⾳的基因。默认情况下,它(minCountsPerGene)保留所有样品中⾄少带有6个UMIreads的基因(例

如,如果在1%的细胞中以3的值表达,则基因将具有的总数)。

通过基因的细胞数来实现过滤(例如UMI>0,或log2(TPM)>1)。默认情况下(minSamples),保留下来的基因能在⾄少1%的细胞中检测得到。

最后,只保留RcisTarget数据库中可⽤的基因。

#(Adjustminimumvaluesaccordingtoyourdatat)

genesKept<-geneFiltering(exprMat,scenicOptions=scenicOptions,

minCountsPerGene=3*.01*ncol(exprMat),

minSamples=ncol(exprMat)*.01)

在进⾏⽹络推断之前,检查是否有任何已知的相关基因被过滤掉(如果缺少任何相关基因,请仔细检查filter设置是否合适):

interestingGenes<-c("Sox9","Sox10","Dlx5")

interestingGenes[which(!interestingGenes%in%genesKept)]

相关性

GENIE33或者GRNBoost可以检测正负关联。为了区分潜在的激活和抑制,我们将⽬标分为正相关和负相关⽬标(⽐如TF与潜在⽬标之间的Spearman相关性)。

runCorrelation(exprMat_filtered,scenicOptions)

运⾏GENIE3得到潜在转录因⼦TF

##Iflaunchedinanewssion,youwillneedtoreload...

#twd("...")

#loomPath<-"..."

#loom<-open_loom(loomPath,mode="r")

#exprMat<-get_dgem(loom)

#clo_loom(loom)

#genesKept<-loadInt(scenicOptions,"genesKept")

#exprMat_filtered<-exprMat[genesKept,]

#library(SCENIC)

#scenicOptions<-readRDS("int/")

#Optional:addlog(ifitisnotlogged/normalizedalready)

exprMat_filtered<-log2(exprMat_filtered+1)

#RunGENIE3

runGenie3(exprMat_filtered,scenicOptions)

构建并评分GRN(runSCENIC_…)

必要时重新加载表达式矩阵:

loom<-open_loom(loomPath,mode="r")

exprMat<-get_dgem(loom)

clo_loom(loom)

#Optional:logexpression(forTFexpressionplot,itdoesnotaffectanyothercalculation)

logMat<-log2(exprMat+1)

dim(exprMat)

使⽤wrapper函数运⾏其余步骤:

library(SCENIC)

scenicOptions<-readRDS("int/")

scenicOptions@ttings$verbo<-TRUE

scenicOptions@ttings$nCores<-10

scenicOptions@ttings$ed<-123

#Foraveryquickrun:

#coexMethod=c("top5perTarget")

scenicOptions@ttings$dbs<-scenicOptions@ttings$dbs["10kb"]#Fortoyrun

#save...

runSCENIC_1_coexNetwork2modules(scenicOptions)

runSCENIC_2_createRegulons(scenicOptions,coexMethod=c("top5perTarget"))#**Onlyfortoyrun!!只⽤于测试数据

runSCENIC_3_scoreCells(scenicOptions,logMat)

可选步骤

将networkactivity转换成ON/OFF(⼆进制)格式

nPcs<-c(5)#Fortoydatat

#nPcs<-c(5,15,50)

scenicOptions@ttings$ed<-123#sameedforallofthem

#使⽤不同的参数运⾏t-SNE

fileNames<-tsneAUC(scenicOptions,aucType="AUC",nPcs=nPcs,perpl=c(5,15,50))

fileNames<-tsneAUC(scenicOptions,aucType="AUC",nPcs=nPcs,perpl=c(5,15,50),onlyHighConf=TRUE,filePrefix="int/tSNE_oHC")

#画图(individualfilesinint/):

fileNames<-paste0("int/",grep(".Rds",grep("tSNE_",("int"),value=T),value=T))

par(mfrow=c(length(nPcs),3))

fileNames<-paste0("int/",grep(".Rds",grep("tSNE_AUC",("int"),value=T,perl=T),value=T))

plotTsne_compareSettings(fileNames,scenicOptions,showLegend=FALSE,varName="CellType",cex=.5)

#Usingonly"high-confidence"regulons(normallysimilar)

par(mfrow=c(3,3))

fileNames<-paste0("int/",grep(".Rds",grep("tSNE_oHC_AUC",("int"),value=T,perl=T),value=T))

plotTsne_compareSettings(fileNames,scenicOptions,showLegend=FALSE,varName="CellType",cex=.5)

输出到loom/SCope

#DGEM(Digitalgeneexpressionmatrix)

#(non-normalizedcounts)

#exprMat<-get_dgem(open_loom(loomPath))

#dgem<-exprMat

#head(colnames(dgem))#shouldcontaintheCellID/name

#Export:

scenicOptions@fileNames$output["loomFile",]<-"output/mouBrain_"

export2scope(scenicOptions,exprMat)

加载.loom⽂件中的结果

SCopeLoomR中也有函数可以导⼊.loom⽂件中的内容,⽐如调节因⼦,AUC和封装内容(⽐如regulonactivity的t-SNE和UMAP结果)。

library(SCopeLoomR)

scenicLoomPath<-getOutName(scenicOptions,"loomFile")

loom<-open_loom(scenicLoomPath)

#Readinformationfromloomfile:

regulons_incidMat<-get_regulons(loom)

regulons<-regulonsToGeneLists(regulons_incidMat)

regulonsAUC<-get_regulonsAuc(loom)

regulonsAucThresholds<-get_regulonThresholds(loom)

embeddings<-get_embeddings(loom)

解读结果

1.细胞状态

AUCell提供跨细胞的调节⼦的活性,AUCell使⽤“AreaunderCurve曲线下⾯积”(AUC)来计算输⼊基因集的关键⼦集是否在每个细胞的表达基因中富集。通过该调节⼦活

性(连续或⼆进制AUC矩阵)来聚类细胞,我们可以看出是否存在倾向于具有相同调节⼦活性的细胞群,并揭⽰在多个细胞中反复发⽣的⽹络状态。这些状态等同于⽹

络的吸引⼦状态。将这些聚类与不同的可视化⽅法相结合,我们可以探索细胞状态与特定调节⼦的关联。

将AUC和TF表达投射到t-SNE上

logMat<-exprMat#Betterifitislogged/normalized

aucellApp<-plotTsne_AUCellApp(scenicOptions,logMat)#defaultt-SNE

savedSelections<-shiny::runApp(aucellApp)

print(tsneFileName(scenicOptions))

tSNE_scenic<-readRDS(tsneFileName(scenicOptions))

aucell_regulonAUC<-loadInt(scenicOptions,"aucell_regulonAUC")

#ShowTFexpression:

par(mfrow=c(2,3))

AUCell::AUCell_plotTSNE(tSNE_scenic$Y,exprMat,aucell_regulonAUC[onlyNonDuplicatedExtended(rownames(aucell_regulonAUC))[c("Dlx5","Sox10","Sox9","Irf1","Stat6")],],plots="Expression")

#保存AUC图⽚:

Cairo::CairoPDF("output/Step4_BinaryRegulonActivity_tSNE_",width=20,height=15)

par(mfrow=c(4,6))

AUCell::AUCell_plotTSNE(tSNE_scenic$Y,cellsAUC=aucell_regulonAUC,plots="AUC")

()

library(KernSmooth)

library(RColorBrewer)

dens2d<-bkde2D(tSNE_scenic$Y,1)$fhat

image(dens2d,col=(9,"YlOrBr"),axes=FALSE)

contour(dens2d,add=TRUE,nlevels=5,drawlabels=FALSE)

#par(bg="black")

par(mfrow=c(1,2))

regulonNames<-c("Dlx5","Sox10")

cellCol<-plotTsne_rgb(scenicOptions,regulonNames,aucType="AUC",aucMaxContrast=0.6)

text(0,10,attr(cellCol,"red"),col="red",cex=.7,pos=4)

text(-20,-10,attr(cellCol,"green"),col="green3",cex=.7,pos=4)

regulonNames<-list(red=c("Sox10","Sox8"),

green=c("Irf1"),

blue=c("Tef"))

cellCol<-plotTsne_rgb(scenicOptions,regulonNames,aucType="Binary")

text(5,15,attr(cellCol,"red"),col="red",cex=.7,pos=4)

text(5,15-4,attr(cellCol,"green"),col="green3",cex=.7,pos=4)

text(5,15-8,attr(cellCol,"blue"),col="blue",cex=.7,pos=4)

GRN:Regulon靶标和模序

regulons<-loadInt(scenicOptions,"regulons")

regulons[c("Dlx5","Irf1")]

regulons<-loadInt(scenicOptions,"aucell_regulons")

head(cbind(onlyNonDuplicatedExtended(names(regulons))))

regulonTargetsInfo<-loadInt(scenicOptions,"regulonTargetsInfo")

tableSubt<-regulonTargetsInfo[TF=="Stat6"&highConfAnnot==TRUE]

viewMotifs(tableSubt)

2.细胞群的调控因⼦

regulonAUC<-loadInt(scenicOptions,"aucell_regulonAUC")

regulonAUC<-regulonAUC[onlyNonDuplicatedExtended(rownames(regulonAUC)),]

regulonActivity_byCellType<-sapply(split(rownames(cellInfo),cellInfo$CellType),function(cells)rowMeans(getAUC(regulonAUC)[,cells]))

regulonActivity_byCellType_Scaled<-t(scale(t(regulonActivity_byCellType),center=T,scale=T))

pheatmap::pheatmap(regulonActivity_byCellType_Scaled,color=colorRampPalette(c("blue","white","red"))(100),breaks=q(-3,3,=100),treeheight_row=10,treeheight_col=10,border_color=NA)

#filename="regulonActivity_",width=10,height=20)

topRegulators<-reshape2::melt(regulonActivity_byCellType_Scaled)

colnames(topRegulators)<-c("Regulon","CellType","RelativeActivity")

topRegulators<-topRegulators[which(topRegulators$RelativeActivity>0),]

viewTable(topRegulators)

minPerc<-.7

binaryRegulonActivity<-loadInt(scenicOptions,"aucell_binary_nonDupl")

cellInfo_binarizedCells<-cellInfo[which(rownames(cellInfo)%in%colnames(binaryRegulonActivity)),,drop=FALSE]

regulonActivity_byCellType_Binarized<-sapply(split(rownames(cellInfo_binarizedCells),cellInfo_binarizedCells$CellType),function(cells)rowMeans(binaryRegulonActivity[,cells,drop=FALSE]))

binaryActPerc_subt<-regulonActivity_byCellType_Binarized[which(rowSums(regulonActivity_byCellType_Binarized>minPerc)>0),]

pheatmap::pheatmap(binaryActPerc_subt,color=colorRampPalette(c("white","pink","red"))(100),breaks=q(0,1,=100),treeheight_row=10,treeheight_col=10,border_color=NA)

本文发布于:2022-12-28 12:22:02,感谢您对本站的认可!

本文链接:http://www.wtabcd.cn/fanwen/fan/90/46597.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

下一篇:revolving
标签:loom
相关文章
留言与评论(共有 0 条评论)
   
验证码:
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图