GAM practice

##packages
Packages <- c("mgcv", "ggplot2","moonBook","ztable","survival","ggGam")

lapply(Packages, library, character.only = TRUE)

##import original data
dd <-read.csv("C:/Users/koho0/Desktop/survival.csv",header=T)

#data inspection

str(dd)

## 'data.frame':    60 obs. of  6 variables:
##  $ id    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ time  : int  760 742 556 560 620 1440 382 530 790 320 ...
##  $ status: int  1 1 1 1 1 0 1 1 1 1 ...
##  $ group : int  4 1 3 3 4 2 1 2 4 1 ...
##  $ age   : int  75 48 74 53 36 66 22 25 59 70 ...
##  $ sex   : int  0 1 0 0 1 0 1 1 0 1 ...

sum(is.na(dd))

## [1] 0

dd$group <- as.factor(dd$group) 
dd$sex <- as.factor(dd$sex) 
dd$status <- as.numeric(dd$status)

mytable(dd,method=3)

## 
##       Descriptive Statistics     
## ---------------------------------- 
##           N          Total        
## ---------------------------------- 
##  id      60     30.5  [15.5;45.5] 
##  time    60  558.0  [381.0;747.5] 
##  status  60                       
##    - 0                 7  (11.7%) 
##    - 1                53  (88.3%) 
##  group   60                       
##    - 1                16  (26.7%) 
##    - 2                13  (21.7%) 
##    - 3                15  (25.0%) 
##    - 4                16  (26.7%) 
##  age     60     58.0  [49.0;66.0] 
##  sex     60                       
##    - 0                37  (61.7%) 
##    - 1                23  (38.3%) 
## ----------------------------------

b0 <- gam(time ~ group + sex +s(age), weights=status, family=cox.ph, data=dd, method="REML")

anova(b0)

## 
## Family: Cox PH 
## Link function: identity 
## 
## Formula:
## time ~ group + sex + s(age)
## 
## Parametric Terms:
##       df Chi.sq p-value
## group  3  8.999  0.0293
## sex    1  0.264  0.6074
## 
## Approximate significance of smooth terms:
##        edf Ref.df Chi.sq p-value
## s(age)   1      1  3.574  0.0587

par(mfrow=c(2,2))
gam.check(b0)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 10 iterations.
## Gradient range [-4.065561e-05,-4.065561e-05]
## (score 175.1801 & scale 1).
## Hessian positive definite, eigenvalue range [4.064758e-05,4.064758e-05].
## Model rank =  13 / 13 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##        k' edf k-index p-value
## s(age)  9   1    1.09    0.82

par(mfrow=c(1,1))
summary(b0)

## 
## Family: Cox PH 
## Link function: identity 
## 
## Formula:
## time ~ group + sex + s(age)
## 
## Parametric coefficients:
##        Estimate Std. Error z value Pr(>|z|)  
## group2  -0.6624     0.4209  -1.574    0.115  
## group3   0.0725     0.3932   0.184    0.854  
## group4  -0.9586     0.3858  -2.484    0.013 *
## sex1     0.1618     0.3149   0.514    0.607  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##        edf Ref.df Chi.sq p-value  
## s(age)   1      1  3.574  0.0587 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Deviance explained = 2.43%
## -REML = 175.18  Scale est. = 1         n = 60

df=data.frame(x=b0$linear.predictors,y=residuals(b0))
ggplot(df,aes(x=x,y=y))+geom_point()+
    labs(x="linear predictor",y="residuals")+theme_bw()

summary(b0)

## 
## Family: Cox PH 
## Link function: identity 
## 
## Formula:
## time ~ group + sex + s(age)
## 
## Parametric coefficients:
##        Estimate Std. Error z value Pr(>|z|)  
## group2  -0.6624     0.4209  -1.574    0.115  
## group3   0.0725     0.3932   0.184    0.854  
## group4  -0.9586     0.3858  -2.484    0.013 *
## sex1     0.1618     0.3149   0.514    0.607  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##        edf Ref.df Chi.sq p-value  
## s(age)   1      1  3.574  0.0587 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Deviance explained = 2.43%
## -REML = 175.18  Scale est. = 1         n = 60

plot(b0,pages=1,seWithMean = TRUE,shift=coef(b0)[1],shade= TRUE)

drawSurv=function(model,data,np=100,timevar="time",until=NULL,id=list()){
    
    if(is.null(until)) until=max(model$model[[timevar]],na.rm=TRUE)
    
    if(length(id)==0) id=list(id=1:nrow(data))
    
    for( i in 1:nrow(data)){
        newd <- data.frame(matrix(0,np,0))
        for (n in names(data)) newd[[n]] <- rep(data[[n]][i],np)
        newd$time <- seq(0,until,length=np)
        fv <- predict(model,newdata=newd,type="response",se=TRUE)
        newd$fit=fv$fit
        # newd$ymax=fv$fit+se*fv$se.fit
        # newd$ymin=fv$fit-se*fv$se.fit
        se <- fv$se.fit/fv$fit
        newd$ymax=exp(log(fv$fit)+se)
        newd$ymin=exp(log(fv$fit)-se)
        
        idname=names(id)[1]
        newd[[idname]]=id[[1]][i]
        if(i==1){
            final=newd
        } else{
            final=rbind(final,newd)
        }
    }
    final[[idname]]=factor(final[[idname]])
    final
    ggplot(data=final,aes_string(x="time",y="fit",fill=idname,group=idname))+
        geom_line(aes_string(color=idname))+
        geom_ribbon(aes_string(ymax="ymax",ymin="ymin"),alpha=0.3)+
        ylim(c(0,1)) + ylab("cumulative survival")+xlab("days")+
        theme_bw()+
        theme(legend.position = "top")
}

averageData=function(data,newValue=list()){
   newd=list()    
   for(i in 1:ncol(data)){
       if(is.numeric(data[[i]])) {
           newd[[i]]=mean(data[[i]],na.rm=TRUE)
       } else if(is.factor(data[[i]])){
           newd[[i]]=levels(data[[i]])[1]
       } else{
           newd[[i]]=sort(unique(data[[i]]))[1]
       }
   }
   names(newd)=names(data)
   df=as.data.frame(newd)
   df
   if(length(newValue)>0){
       no=length(newValue[[1]])
       for(i in 1:no){
           if(i==1) {
               final=df
           } else{
               final=rbind(final,df)
           }
       }
       final[[names(newValue)[1]]]=newValue[[1]]
       df=final
   }
   df
}

df=averageData(dd,list(group=c("1","2","3","4")))
drawSurv(b0,data=df,id=list(group=c("1","2","3","4")))

GAM practice

HBH

2020 5 18