##packages
Packages <- c("mgcv", "ggplot2","moonBook","ztable","survival","ggGam")
lapply(Packages, library, character.only = TRUE)
##import original data
dd <-read.csv("C:/Users/koho0/Desktop/survival.csv",header=T)
#data inspection
str(dd)
## 'data.frame': 60 obs. of 6 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ time : int 760 742 556 560 620 1440 382 530 790 320 ...
## $ status: int 1 1 1 1 1 0 1 1 1 1 ...
## $ group : int 4 1 3 3 4 2 1 2 4 1 ...
## $ age : int 75 48 74 53 36 66 22 25 59 70 ...
## $ sex : int 0 1 0 0 1 0 1 1 0 1 ...
sum(is.na(dd))
## [1] 0
dd$group <- as.factor(dd$group)
dd$sex <- as.factor(dd$sex)
dd$status <- as.numeric(dd$status)
mytable(dd,method=3)
##
## Descriptive Statistics
## ----------------------------------
## N Total
## ----------------------------------
## id 60 30.5 [15.5;45.5]
## time 60 558.0 [381.0;747.5]
## status 60
## - 0 7 (11.7%)
## - 1 53 (88.3%)
## group 60
## - 1 16 (26.7%)
## - 2 13 (21.7%)
## - 3 15 (25.0%)
## - 4 16 (26.7%)
## age 60 58.0 [49.0;66.0]
## sex 60
## - 0 37 (61.7%)
## - 1 23 (38.3%)
## ----------------------------------
b0 <- gam(time ~ group + sex +s(age), weights=status, family=cox.ph, data=dd, method="REML")
anova(b0)
##
## Family: Cox PH
## Link function: identity
##
## Formula:
## time ~ group + sex + s(age)
##
## Parametric Terms:
## df Chi.sq p-value
## group 3 8.999 0.0293
## sex 1 0.264 0.6074
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(age) 1 1 3.574 0.0587
par(mfrow=c(2,2))
gam.check(b0)
##
## Method: REML Optimizer: outer newton
## full convergence after 10 iterations.
## Gradient range [-4.065561e-05,-4.065561e-05]
## (score 175.1801 & scale 1).
## Hessian positive definite, eigenvalue range [4.064758e-05,4.064758e-05].
## Model rank = 13 / 13
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(age) 9 1 1.09 0.82
par(mfrow=c(1,1))
summary(b0)
##
## Family: Cox PH
## Link function: identity
##
## Formula:
## time ~ group + sex + s(age)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## group2 -0.6624 0.4209 -1.574 0.115
## group3 0.0725 0.3932 0.184 0.854
## group4 -0.9586 0.3858 -2.484 0.013 *
## sex1 0.1618 0.3149 0.514 0.607
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(age) 1 1 3.574 0.0587 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Deviance explained = 2.43%
## -REML = 175.18 Scale est. = 1 n = 60
df=data.frame(x=b0$linear.predictors,y=residuals(b0))
ggplot(df,aes(x=x,y=y))+geom_point()+
labs(x="linear predictor",y="residuals")+theme_bw()
summary(b0)
##
## Family: Cox PH
## Link function: identity
##
## Formula:
## time ~ group + sex + s(age)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## group2 -0.6624 0.4209 -1.574 0.115
## group3 0.0725 0.3932 0.184 0.854
## group4 -0.9586 0.3858 -2.484 0.013 *
## sex1 0.1618 0.3149 0.514 0.607
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(age) 1 1 3.574 0.0587 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Deviance explained = 2.43%
## -REML = 175.18 Scale est. = 1 n = 60
plot(b0,pages=1,seWithMean = TRUE,shift=coef(b0)[1],shade= TRUE)
drawSurv=function(model,data,np=100,timevar="time",until=NULL,id=list()){
if(is.null(until)) until=max(model$model[[timevar]],na.rm=TRUE)
if(length(id)==0) id=list(id=1:nrow(data))
for( i in 1:nrow(data)){
newd <- data.frame(matrix(0,np,0))
for (n in names(data)) newd[[n]] <- rep(data[[n]][i],np)
newd$time <- seq(0,until,length=np)
fv <- predict(model,newdata=newd,type="response",se=TRUE)
newd$fit=fv$fit
# newd$ymax=fv$fit+se*fv$se.fit
# newd$ymin=fv$fit-se*fv$se.fit
se <- fv$se.fit/fv$fit
newd$ymax=exp(log(fv$fit)+se)
newd$ymin=exp(log(fv$fit)-se)
idname=names(id)[1]
newd[[idname]]=id[[1]][i]
if(i==1){
final=newd
} else{
final=rbind(final,newd)
}
}
final[[idname]]=factor(final[[idname]])
final
ggplot(data=final,aes_string(x="time",y="fit",fill=idname,group=idname))+
geom_line(aes_string(color=idname))+
geom_ribbon(aes_string(ymax="ymax",ymin="ymin"),alpha=0.3)+
ylim(c(0,1)) + ylab("cumulative survival")+xlab("days")+
theme_bw()+
theme(legend.position = "top")
}
averageData=function(data,newValue=list()){
newd=list()
for(i in 1:ncol(data)){
if(is.numeric(data[[i]])) {
newd[[i]]=mean(data[[i]],na.rm=TRUE)
} else if(is.factor(data[[i]])){
newd[[i]]=levels(data[[i]])[1]
} else{
newd[[i]]=sort(unique(data[[i]]))[1]
}
}
names(newd)=names(data)
df=as.data.frame(newd)
df
if(length(newValue)>0){
no=length(newValue[[1]])
for(i in 1:no){
if(i==1) {
final=df
} else{
final=rbind(final,df)
}
}
final[[names(newValue)[1]]]=newValue[[1]]
df=final
}
df
}
df=averageData(dd,list(group=c("1","2","3","4")))
drawSurv(b0,data=df,id=list(group=c("1","2","3","4")))