数据分析:数据预处理--批次校正(四)
前⾔
加载R包
library(dplyr)
library(tibble)
library(Bioba)
library(limma)
library(ggplot2)
#rm(list=ls())
options(stringsAsFactors=F)
options(e=1000*1024^2)
subgrp<-c("HC","CP","PDAC")
<-c("#568875","#73FAFC","#EE853D")
ExprSet<-readRDS("MS_Protein_")
Function
PCAFun<-function(datat=ExprSet){
#datat=ExprSet
metadata<-pData(datat)
profile<-exprs(datat)
#pca
pca<-prcomp(scale(t(profile),center=T,scale=T))
require(factoextra)
eig<-get_eig(pca)
#explainsvariable
explains<-paste0(paste0("PC",q(2)),"(",paste0(round(eig[1:2,2],2),"%"),")")
#principalcomponentscoreofeachsample
score<-inner_join(pca$x%>%()%>%
dplyr::lect(c(1:2))%>%
rownames_to_column("SampleID"),
metadata%>%rownames_to_column("SampleID"),
by="SampleID")%>%
mutate(SubGroup=factor(SubGroup,levels=subgrp))
#PERMANOVA
require(vegan)
(123)
if(any(profile<0)){
res_adonis<-adonis(vegdist(t(profile),method="manhattan")~metadata$SubGroup,permutations=999)
}el{
res_adonis<-adonis(vegdist(t(profile),method="bray")~metadata$SubGroup,permutations=999)
}
adn_pvalue<-res_adonis[[1]][["Pr(>F)"]][1]
adn_rsquared<-round(res_adonis[[1]][["R2"]][1],3)
signi_label<-paste(cut(adn_pvalue,breaks=c(-Inf,0.001,0.01,0.05,Inf),label=c("***","**","*",".")))
adn_res_format<-bquote(atop(atop("PERMANOVA",R^2==~.(adn_rsquared)),
atop("p-value="~.(adn_pvalue)~.(signi_label),phantom())))
pl<-ggplot(score,aes(x=PC1,y=PC2))+
#geom_point(aes(fill=SubGroup),size=3.5,shape=21,stroke=.8,color="black")+
geom_point(aes(color=SubGroup,shape=Omics),size=2.5)+
stat_ellip(aes(color=SubGroup),level=0.95,linetype=1,size=1.5)+
labs(x=explains[1],y=explains[2])+
scale_color_manual(values=)+
#scale_fill_manual(name="Condition",
#scale_fill_manual(name="Condition",
#values=)+
scale_shape_manual(name="Batch",
values=c(15,16,17))+
annotate("text",x=max(score$PC1)-8,
y=min(score$PC1),
label=adn_res_format,
size=6)+
#guides(color="none")+
theme_bw()+
theme(=element_text(size=10,color="black",face="bold"),
=element_text(size=9,color="black"),
text=element_text(size=8,color="black",family="rif"),
=element_text(size=9,color="black",face="bold"),
=element_blank(),
=element_text(size=11,color="black",family="rif"),
=element_text(size=10,color="black",family="rif"),
on=c(0,0),
ication=c(0,0),
ound=element_rect(color="black",fill="white",linetype=2,size=0.5))
return(pl)
}
RemoveBatchExprSet<-function(datt=ExprSet){
#datt=ExprSet
pheno<-pData(datt)
expr<-exprs(datt)
feature<-fData(datt)
#Removebatcheffects
phen<-pheno%>%arrange(Omics)
prof<-(expr)%>%dplyr::lect(rownames(phen))
if(!any(rownames(phen)==colnames(prof))){
stop("TheOrderofSampleIDinDiscoverySetiswrong")
}
#CA19-9shouldberemovedbatcheffects
prof_CA199<-prof[rownames(prof)=="CA199",,F]
prof_NoCA199<-prof[rownames(prof)!="CA199",,F]
prof_rbe<-removeBatchEffect((prof_NoCA199),batch=phen$Omics)
prof_merge<-rbind(prof_rbe,prof_CA199)
fdf<-new("AnnotatedDataFrame",data=feature)
exprs_dis<-(prof_merge)
adf_dis<-new("AnnotatedDataFrame",data=phen)
experimentData<-new("MIAME",
name="Hua",lab="Lab",
title="tumorExperiment",
abstract="TheMassSpectrometryExpressionSetwithoverlapsamples",
url="",
other=list(notes="TheInterctProteinsandSamples"))
res<-new("ExpressionSet",
exprs=exprs_dis,
phenoData=adf_dis,
featureData=fdf,
experimentData=experimentData)
return(res)
}
RemoveBatchExprSet_Design<-function(datt=ExprSet){
#datt=ExprSet
pheno<-pData(datt)
expr<-exprs(datt)
feature<-fData(datt)
#design
Design<-(~SubGroup,data=pheno)
#Removebatcheffects
phen<-pheno%>%arrange(Omics)
prof<-(expr)%>%dplyr::lect(rownames(phen))
if(!any(rownames(phen)==colnames(prof))){
stop("TheOrderofSampleIDinDiscoverySetiswrong")
}
#CA19-9shouldberemovedbatcheffects
prof_CA199<-prof[rownames(prof)=="CA199",,F]
prof_NoCA199<-prof[rownames(prof)!="CA199",,F]
prof_rbe<-removeBatchEffect((prof_NoCA199),batch=phen$Omics,design=Design)
prof_merge<-rbind(prof_rbe,prof_CA199)
fdf<-new("AnnotatedDataFrame",data=feature)
exprs_dis<-(prof_merge)
adf_dis<-new("AnnotatedDataFrame",data=phen)
experimentData<-new("MIAME",
name="Hua",lab="Lab",
title="tumorExperiment",
abstract="TheMassSpectrometryExpressionSetwithoverlapsamples",
url="",
other=list(notes="TheInterctProteinsandSamples"))
res<-new("ExpressionSet",
exprs=exprs_dis,
phenoData=adf_dis,
featureData=fdf,
experimentData=experimentData)
return(res)
}
#Batchmeancentering
RemoveBatchExprSet_BMC<-function(datt=ExprSet){
#datt=ExprSet
pheno<-pData(datt)
expr<-exprs(datt)
feature<-fData(datt)
#Removebatcheffects
phen<-pheno%>%arrange(Omics)
prof<-(expr)%>%dplyr::lect(rownames(phen))
#CA19-9shouldberemovedbatcheffects
prof_CA199<-prof[rownames(prof)=="CA199",,F]
prof_NoCA199<-prof[rownames(prof)!="CA199",,F]
prof_list<-list()
batchlevels<-unique(phen$Omics)
for(iin1:length(batchlevels)){
prof_list[[i]]<-scale(t(prof_NoCA199%>%
dplyr::lect(rownames(phen[phen$Omics==batchlevels[i],]))),
center=TRUE,scale=FALSE)
}
prof_merge<-rbind(t((rbind,prof_list)),prof_CA199)
if(!any(rownames(phen)==colnames(prof_merge))){
stop("TheOrderofSampleIDinDiscoverySetiswrong")
}
fdf<-new("AnnotatedDataFrame",data=feature)
exprs_dis<-(prof_merge)
adf_dis<-new("AnnotatedDataFrame",data=phen)
experimentData<-new("MIAME",
name="Hua",lab="Lab",
title="tumorExperiment",
abstract="TheMassSpectrometryExpressionSetwithoverlapsamples",
url="",
other=list(notes="TheInterctProteinsandSamples"))
res<-new("ExpressionSet",
exprs=exprs_dis,
phenoData=adf_dis,
phenoData=adf_dis,
featureData=fdf,
experimentData=experimentData)
return(res)
}
#ComBat
RemoveBatchExprSet_ComBat<-function(datt=ExprSet){
#datt=ExprSet
pheno<-pData(datt)
expr<-exprs(datt)
feature<-fData(datt)
#design
Design<-(~SubGroup,data=pheno)
#Removebatcheffects
phen<-pheno%>%arrange(Omics)
prof<-(expr)%>%dplyr::lect(rownames(phen))
#CA19-9shouldberemovedbatcheffects
prof_CA199<-prof[rownames(prof)=="CA199",,F]
prof_NoCA199<-prof[rownames(prof)!="CA199",,F]
prof_combat<-sva::ComBat(prof_NoCA199,batch=phen$Omics,mod=Design,=F,=F)
prof_merge<-rbind(prof_combat,prof_CA199)
if(!any(rownames(phen)==colnames(prof_merge))){
stop("TheOrderofSampleIDinDiscoverySetiswrong")
}
fdf<-new("AnnotatedDataFrame",data=feature)
exprs_dis<-(prof_merge)
adf_dis<-new("AnnotatedDataFrame",data=phen)
experimentData<-new("MIAME",
name="Hua",lab="Lab",
title="tumorExperiment",
abstract="TheMassSpectrometryExpressionSetwithoverlapsamples",
url="",
other=list(notes="TheInterctProteinsandSamples"))
res<-new("ExpressionSet",
exprs=exprs_dis,
phenoData=adf_dis,
featureData=fdf,
experimentData=experimentData)
return(res)
}
Run
ExprSet_RBE<-RemoveBatchExprSet(datt=ExprSet)
ExprSet_RBE_Design<-RemoveBatchExprSet_Design(datt=ExprSet)
ExprSet_RBE_BMC<-RemoveBatchExprSet_BMC(datt=ExprSet)
ExprSet_RBE_ComBat<-RemoveBatchExprSet_ComBat(datt=ExprSet)
cowplot::plot_grid(#PCAFun(datat=ExprSet),
PCAFun(datat=ExprSet_RBE),
PCAFun(datat=ExprSet_RBE_Design),
PCAFun(datat=ExprSet_RBE_BMC),
PCAFun(datat=ExprSet_RBE_ComBat),
align="hv",
ncol=2,
labels=c(#"Origin",
"Batchcorrected",
"BatchcorrectedwithDesign",
"BatchcorrectedwithBMC",
"BatchcorrectedwithComBat"))
systemicinformation
ssionInfo()
Rversion4.0.2(2020-06-22)
Platform:x86_64-conda_cos6-linux-gnu(64-bit)
Runningunder:CentOSLinux8(Core)
Matrixproducts:default
BLAS/LAPACK:/disk/share/anaconda3/lib/
locale:
[1]LC_CTYPE=en_-8LC_NUMERIC=CLC_TIME=en_-8LC_COLLATE=en_-8
[5]LC_MONETARY=en_-8LC_MESSAGES=en_-8LC_PAPER=en_-8LC_NAME=C
[9]LC_ADDRESS=CLC_TELEPHONE=CLC_MEASUREMENT=en_-8LC_IDENTIFICATION=C
attachedbapackages:
[1]parallelstatsgraphicsgrDevicesutilsdatatsmethodsba
otherattachedpackages:
[1]DEP__1.4ggpubr_0.4.0gtsummary_1.4.0vegan_2.5-6
[6]lattice_0.20-45permute_0.9-5factoextra_1.0.7ggrepel_0.9.1.9999sampling_2.8
[11]_1.14.2convert_1.64.0marray_1.68.0limma_3.46.0Bioba_2.50.0
[16]BiocGenerics_0.36.0ggplot2_3.3.5tibble_3.1.6dplyr_1.0.7
loadedviaanamespace(andnotattached):
[1]utf8_1.2.2shinydashboard_0.7.1reticulate_1.18
[4]gmm_1.6-5tidylect_1.1.1htmlwidgets_1.5.4
[7]grid_4.0.2BiocParallel_1.24.1lpSolve_5.6.15
[10]norm_1.0-9.5munll_0.5.0codetools_0.2-18
[13]preprocessCore_1.52.1DT_0.20umap_0.2.7.0
[16]withr_2.4.3colorspace_2.0-2knitr_1.37
[19]rstudioapi_0.13stats4_4.0.2robustba_0.93-6
[22]bayesm_3.1-4ggsignif_0.6.0mzID_1.28.0
[25]MatrixGenerics_1.2.1labeling_0.4.2GenomeInfoDbData_1.2.4
[28]farver_2.1.0vctrs_0.3.8generics_0.1.1
[31]xfun_0.29R6_2.5.1doParallel_1.0.16
[34]GenomeInfoDb_1.26.4clue_0.3-60bitops_1.0-7
[37]DelayedArray_0.16.3asrtthat_0.2.1promis_1.2.0.1
[40]scales_1.1.1gtable_0.3.0Cairo_1.5-12.2
[43]affy_1.68.0sandwich_3.0-0rlang_0.4.12
[46]mzR_2.24.1GlobalOptions_0.1.2splines_4.0.2
[49]rstatix_0.7.0impute_1.64.0broom_0.7.9
[52]BiocManager_1.30.16yaml_2.2.1abind_1.4-5
[55]crosstalk_1.2.0backports_1.4.1httpuv_1.6.4
[58]tensorA_0.36.2tools_4.0.2affyio_1.60.0
[61]ellipsis_0.3.2jquerylib_0.1.4RColorBrewer_1.1-2
[64]MSnba_2.16.1Rcpp_1.0.7plyr_1.8.6
[67]zlibbioc_1.36.0purrr_0.3.4RCurl_1.98-1.3
[67]zlibbioc_1.36.0purrr_0.3.4RCurl_1.98-1.3
[70]openssl_1.4.6GetoptLong_1.0.5cowplot_1.1.0
[73]S4Vectors_0.28.1zoo_1.8-8SummarizedExperiment_1.20.0
[76]haven_2.3.1cluster_2.1.0tinytex_0.36
[79]magrittr_2.0.1RSpectra_0.16-0openxlsx_4.2.3
[82]circlize_0.4.10pcaMethods_1.80.0mvtnorm_1.1-1
[85]ProtGenerics_1.22.0matrixStats_0.61.0xtable_1.8-4
[88]hms_1.1.1mime_0.12evaluate_0.14
[91]XML_3.99-0.6rio_0.5.16readxl_1.3.1
[94]IRanges_2.24.1shape_1.4.5compiler_4.0.2
[97]gt_0.2.2ncdf4_1.17crayon_1.4.2
[100]htmltools_0.5.2mgcv_1.8-38later_1.2.0
[103]tzdb_0.2.0tidyr_1.1.4DBI_1.1.2
[106]ComplexHeatmap_2.6.2MASS_7.3-54tmvtnorm_1.4-10
[109]s_1.3.0compositions_2.0-2Matrix_1.4-0
[112]car_3.0-10readr_2.1.1cli_3.1.0
[115]vsn_3.58.0imputeLCMD_2.0GenomicRanges_1.42.0
[118]forcats_0.5.0pkgconfig_2.0.3foreign_0.8-81
[121]MALDIquant_1.19.3foreach_1.5.1bslib_0.3.1
[124]XVector_0.30.0stringr_1.4.0digest_0.6.29
[127]rmarkdown_2.11cellranger_1.1.0curl_4.3.2
[130]shiny_1.7.1rjson_0.2.20lifecycle_1.0.1
[133]nlme_3.1-150jsonlite_1.7.2carData_3.0-4
[136]askpass_1.1fansi_0.5.0pillar_1.6.4
[139]fastmap_1.1.0DEoptimR_1.0-8survival_3.2-13
[142]glue_1.6.0zip_2.1.1png_0.1-7
[145]iterators_1.0.13stringi_1.4.6sass_0.4.0
Reference
参考⽂章如引起任何侵权问题,可以与我联系,谢谢。
本文发布于:2022-12-28 04:52:09,感谢您对本站的认可!
本文链接:http://www.wtabcd.cn/fanwen/fan/90/44734.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |