生存分析
一.Libraries and data ts
library() # 查看你的系统中所有已装的libraries
library(survival) # 加载一个library, 也可使用鼠标来做
library(help=survival) # e the list of available functions and data ts.
data(aml) # 加载数据 aml
aml # e the data
# 《生存分析》需要的两个 libraries
library(survival)
library(KMsurv) #包含我们教材中所有的数据
# 若想使数据中的变量能够在R中作为一个变量使用,u attach():
> attach(aids)
> infect
> detach(aids)
二.Survival Objects
#由函数 Surv 产生一个 survival object
Surv (time, time2, event, type)
• time: survival time
• time2: ending time if it is interval censored
• event: censoring variable.
For interval censored data, 0=right censored, 1=event at time, 2=left censored, 3=interval censored.
• type: indicating whether it is right censored, left censored or interval censored or counting process. 默认值是右删失或 counting process
#假设一个观测值为八年级下册历史第一单元思维导图 [2, 3],则 R 语句为
Surv(time=2,time2=3, event=3, type = "interval")
#右删失数据
• attach(aml)
• Surv(time,status)
• detach(aml)
#左截断右删失数据
• data(psych)
• attach(psych)
• my.surv.object <- Surv(age, age+time, death)
• my.surv.object
[1] (51,52 ] (58,59 ] (55,57 ] (28,50 ] (21,51+] (19,47 ] (25,57 ] [22] (29,63+] (35,65+] (32,67 ] (36,76 ] (32,71+]
• detach(psych)
三. The Kaplan-Meier Estimates
#计算生存曲线:survfit,有三个重要的参数:formula, conf.int, and pe.情感文字
survfit(formula, conf.int = 0.95, pe = "log")
pe=”plain” #linear confidence interval in our book
pe=”log” # 对H(t)作log transformation
pe=”log-log” #一壶千金log-transformed in our book
myfit = survfit(Surv(time,status) ~ 1,data=kidney) #在新版的R高中数学知识点全总结里,这个~1是必要的。
attach(kidney)
myfit = survfit(Surv(time,status) ~ 1) #在新版的R里,这个~1是必要的。
a=summary(survfit(Surv(time,status)~1))
b=summary(survfit(Surv(time,status)~1), censored=TRUE)
#提取 survfit 产生的结果
a$surv # outputs the Kaplan-Meier estimate at each t_i
a$time # {t_i}
a$n.risk # {Y_i}
a$n.event # {d_i}
summary(myfit)$ # standard error of the K-M estimate at {t_i}
summary(myfit)$lower # lower pointwi estimates
summary(myfit)$upper # upper pointwi estimates
画出生存曲线
plot(myfit, main="Kaplan-Meier estimate with 95% confidence bounds",
xlab="time", ylab="survival function")
最浪漫的话par(mfcol=c(2,1))
plot(myfit, conf.int=F, main="Kaplan-Meier estimate with 95% confidence bounds",
xlab="time", ylab="survival function")
plot(survfit(Surv(time,status) ~ 1, conf.int=0.99),
main="Kaplan-Meier estimate with 99% confidence bounds",
xlab="time", ylab="survival function")
par(mfrow=c(1,1))
#假设一个数据里有不同的组,下列指令可同时画出各组的生存曲线
attach(kidney)
myfit1 = survfit( Surv(time,status) ~ x)
plot(myfit1, main="Kaplan-Meier estimate ", xlab="time", ylab="survival function for different x group")
plot(myfit1, conf.int=T, col=c("green","red"), main="Kaplan-Meier estimate ", xlab="time", ylab="survival function for different x group")
#下列指令输出 Residual mean time 和 它的标准差,及median time
print(myfit, an=TRUE)
四.Nelson-Aalen estimator
risk = summary(myfit)$n.risk
event = summary(myfit)$n.event
time = summary(myfit)$time水杨酸类
hazard = event / risk
n = length(hazard)
H = rep(0,n)
H[1] = hazard[1]
for( i in 2:n) H[i]=H[i-1]+hazard[i]
plot(time,H,type=”s”)
#数据aml
my <- summary(survfit(Surv(aml$time[1:11], aml$status[1:11]),type="fh"))
list(my$time, -log(my$surv))
五. Log-rank test
attach(melanom)
survdiff(Surv(days,status==1)~x)
survdiff(Surv(days,status==1)~x+strata(ulc))
#关于儿童急性白血病的配对数据, ction 1.2
attach(drug6mp)
placebo= drug6mp[,c("remstat","pair","t1”)]
drug = drug6mp[,c("pair","remstat","t2","relap")]
将变量名统一
colnames(placebo)[3]=”time”
colnames(drug)[3]=”time”顺产运动
colnames(drug)[4]=”censor”
漂亮的生日蛋糕对placebo组加上删失变量
censor=rep(1,21)