Data processing

library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(readxl)

dat <- read_excel("~/Desktop/DTR work/fwddatacode/AED Withdrawal Study.xlsx",sheet=3)
dat_baseline <- read_excel("~/Desktop/DTR work/fwddatacode/AED Withdrawal Study.xlsx",sheet=1)


dat$sezind1[dat$sezind1>1]<-1
dat$sezind2[dat$sezind2>1]<-1
dat$sezind3[dat$sezind3>1]<-1
dat$sezind4[dat$sezind4>1]<-1


dat0<- dat_baseline%>%select(patno,drug1,drug2,drug3,dobd,dobm,doby,eeg,neurodef,school,develop,datesez1y,datesez2y,datesezmry,aedstarty)
dat$free_year0 <- dat$Randy - dat0$datesezmry
dat$free_year1 <- (( (1-dat$sezind1)*1000 + dat$free_year0+1 ) - abs( (1-dat$sezind1)*1000 - dat$free_year0-1 ))/2
dat$free_year2 <- (( (1-dat$sezind2)*1000 + dat$free_year1+1 ) - abs( (1-dat$sezind2)*1000 - dat$free_year1-1 ))/2
dat$free_year3 <- (( (1-dat$sezind3)*1000 + dat$free_year2+1 ) - abs( (1-dat$sezind3)*1000 - dat$free_year2-1 ))/2
dat$free_year4 <- (( (1-dat$sezind4)*1000 + dat$free_year3+1 ) - abs( (1-dat$sezind4)*1000 - dat$free_year3-1 ))/2

dat$age <- dat$Randy - dat0$datesezmry

dat$free_year0 <- (dat$free_year0>=1) +(dat$free_year0>=2) + (dat$free_year0>=5)
dat$free_year1 <- (dat$free_year1>=1) +(dat$free_year1>=2) + (dat$free_year1>=5)
dat$free_year2 <- (dat$free_year2>=1) +(dat$free_year2>=2) + (dat$free_year2>=5)
dat$free_year3 <- (dat$free_year3>=1) +(dat$free_year3>=2) + (dat$free_year3>=5)
dat$free_year4 <- (dat$free_year4>=1) +(dat$free_year4>=2) + (dat$free_year4>=5)

# dat$free_year0[dat$free_year0>5]<-6
# dat$free_year1[dat$free_year1>5]<-6
# dat$free_year2[dat$free_year2>5]<-6
# dat$free_year3[dat$free_year3>5]<-6
# dat$free_year4[dat$free_year4>5]<-6

# table(dat$free_year0)
# table(dat$free_year1)
# table(dat$free_year2)
# table(dat$free_year3)
# table(dat$free_year4)



result<- data.frame(id = dat$patno, A = dat$treatgp)
n<-length(dat$patno)
result$drug1_stage_1<-dat$drug11
result$drug1_stage_2<-dat$drug12
result$drug1_stage_3<-dat$drug13
result$drug1_stage_4<-dat$drug14
result$drug1_stage_5<-dat$drug15
result$drug1_stage_6<-dat$drug1F
result$drug1_stage_7<-dat$drug1G

result$drug2_stage_1<-dat$drug21
result$drug2_stage_2<-dat$drug22
result$drug2_stage_3<-dat$drug23
result$drug2_stage_4<-dat$drug24
result$drug2_stage_5<-dat$drug25
result$drug2_stage_6<-dat$drug2F
result$drug2_stage_7<-dat$drug2G

result$drug_num_stage1 <- as.numeric(!is.na( as.numeric(dat$drug11) )) + as.numeric(!is.na( as.numeric(dat$drug21) ))
result$drug_num_stage2 <- as.numeric(!is.na( as.numeric(dat$drug12) )) + as.numeric(!is.na( as.numeric(dat$drug22) ))
result$drug_num_stage3 <- as.numeric(!is.na( as.numeric(dat$drug13) )) + as.numeric(!is.na( as.numeric(dat$drug23) ))
result$drug_num_stage4 <- as.numeric(!is.na( as.numeric(dat$drug14) )) + as.numeric(!is.na( as.numeric(dat$drug24) ))
result$drug_num_stage5 <- as.numeric(!is.na( as.numeric(dat$drug15) )) + as.numeric(!is.na( as.numeric(dat$drug25) ))
result$drug_num_stage6 <- as.numeric(!is.na( as.numeric(dat$drug1F) )) + as.numeric(!is.na( as.numeric(dat$drug2F) ))
result$drug_num_stage7 <- as.numeric(!is.na( as.numeric(dat$drug1G) )) + as.numeric(!is.na( as.numeric(dat$drug2G) ))

result$ddd1  <- as.numeric(dat$dosed11) +  as.numeric(dat$dosed21)
result$ddd2  <- as.numeric(dat$dosed12) +  as.numeric(dat$dosed22)
result$ddd3  <- as.numeric(dat$dosed13) +  as.numeric(dat$dosed23)
result$ddd4  <- as.numeric(dat$dosed14) +  as.numeric(dat$dosed24)

dat$dosed11<- as.numeric(dat$dosed11);dat$dosed11[is.na(dat$dosed11)]<-0
dat$dosed21<- as.numeric(dat$dosed21);dat$dosed21[is.na(dat$dosed21)]<-0
dat$dosed12<- as.numeric(dat$dosed12);dat$dosed12[is.na(dat$dosed12)]<-0
dat$dosed22<- as.numeric(dat$dosed22);dat$dosed22[is.na(dat$dosed22)]<-0
dat$dosed13<- as.numeric(dat$dosed13);dat$dosed13[is.na(dat$dosed13)]<-0
dat$dosed23<- as.numeric(dat$dosed23);dat$dosed23[is.na(dat$dosed23)]<-0
dat$dosed14<- as.numeric(dat$dosed14);dat$dosed14[is.na(dat$dosed14)]<-0
dat$dosed24<- as.numeric(dat$dosed24);dat$dosed24[is.na(dat$dosed24)]<-0

result$ddd1  <-
  (as.numeric(dat$drug11==1) *dat$dosed11+(dat$drug21==1) *dat$dosed21)/1500+ 
  (as.numeric(dat$drug11==2) *dat$dosed11+(dat$drug21==2) *dat$dosed21)/100+ 
  (as.numeric(dat$drug11==3) *dat$dosed11+(dat$drug21==3) *dat$dosed21)/300+ 
  (as.numeric(dat$drug11==4) *dat$dosed11+(dat$drug21==4) *dat$dosed21)/1000+ 
  (as.numeric(dat$drug11==5) *dat$dosed11+(dat$drug21==5) *dat$dosed21)/1250+ 
  (as.numeric(dat$drug11==6) *dat$dosed11+(dat$drug21==6) *dat$dosed21)/1250+ 
  (as.numeric(dat$drug11==7) *dat$dosed11+(dat$drug21==7) *dat$dosed21)/10+ 
  (as.numeric(dat$drug11==8) *dat$dosed11+(dat$drug21==8) *dat$dosed21)/20+ 
  (as.numeric(dat$drug11==9) *dat$dosed11+(dat$drug21==9) *dat$dosed21)/8+ 
  (as.numeric(dat$drug11==10)*dat$dosed11+(dat$drug21==10)*dat$dosed21)/1500+ 
  (as.numeric(dat$drug11==11)*dat$dosed11+(dat$drug21==11)*dat$dosed21)/900+ 
  (as.numeric(dat$drug11==14)*dat$dosed11+(dat$drug21==14)*dat$dosed21)/400+ 
  (as.numeric(dat$drug11==15)*dat$dosed11+(dat$drug21==15)*dat$dosed21)/500+ 
  (as.numeric(dat$drug11==16)*dat$dosed11+(dat$drug21==16)*dat$dosed21)/2500+ 
  (as.numeric(dat$drug11==17)*dat$dosed11+(dat$drug21==17)*dat$dosed21)/3000+ 
  (as.numeric(dat$drug11==18)*dat$dosed11+(dat$drug21==18)*dat$dosed21)/5+ 
  (as.numeric(dat$drug11==19)*dat$dosed11+(dat$drug21==19)*dat$dosed21)/2000

result$ddd2  <-
  (as.numeric(dat$drug12==1) *dat$dosed12+(dat$drug22==1) *dat$dosed22)/1500+ 
  (as.numeric(dat$drug12==2) *dat$dosed12+(dat$drug22==2) *dat$dosed22)/100+ 
  (as.numeric(dat$drug12==3) *dat$dosed12+(dat$drug22==3) *dat$dosed22)/300+ 
  (as.numeric(dat$drug12==4) *dat$dosed12+(dat$drug22==4) *dat$dosed22)/1000+ 
  (as.numeric(dat$drug12==5) *dat$dosed12+(dat$drug22==5) *dat$dosed22)/1250+ 
  (as.numeric(dat$drug12==6) *dat$dosed12+(dat$drug22==6) *dat$dosed22)/1250+ 
  (as.numeric(dat$drug12==7) *dat$dosed12+(dat$drug22==7) *dat$dosed22)/10+ 
  (as.numeric(dat$drug12==8) *dat$dosed12+(dat$drug22==8) *dat$dosed22)/20+ 
  (as.numeric(dat$drug12==9) *dat$dosed12+(dat$drug22==9) *dat$dosed22)/8+ 
  (as.numeric(dat$drug12==10)*dat$dosed12+(dat$drug22==10)*dat$dosed22)/1500+ 
  (as.numeric(dat$drug12==11)*dat$dosed12+(dat$drug22==11)*dat$dosed22)/900+ 
  (as.numeric(dat$drug12==14)*dat$dosed12+(dat$drug22==14)*dat$dosed22)/400+ 
  (as.numeric(dat$drug12==15)*dat$dosed12+(dat$drug22==15)*dat$dosed22)/500+ 
  (as.numeric(dat$drug12==16)*dat$dosed12+(dat$drug22==16)*dat$dosed22)/2500+ 
  (as.numeric(dat$drug12==17)*dat$dosed12+(dat$drug22==17)*dat$dosed22)/3000+ 
  (as.numeric(dat$drug12==18)*dat$dosed12+(dat$drug22==18)*dat$dosed22)/5+ 
  (as.numeric(dat$drug12==19)*dat$dosed12+(dat$drug22==19)*dat$dosed22)/2000

result$ddd3  <-
  (as.numeric(dat$drug13==1) *dat$dosed13+(dat$drug23==1) *dat$dosed23)/1500+ 
  (as.numeric(dat$drug13==2) *dat$dosed13+(dat$drug23==2) *dat$dosed23)/100+ 
  (as.numeric(dat$drug13==3) *dat$dosed13+(dat$drug23==3) *dat$dosed23)/300+ 
  (as.numeric(dat$drug13==4) *dat$dosed13+(dat$drug23==4) *dat$dosed23)/1000+ 
  (as.numeric(dat$drug13==5) *dat$dosed13+(dat$drug23==5) *dat$dosed23)/1250+ 
  (as.numeric(dat$drug13==6) *dat$dosed13+(dat$drug23==6) *dat$dosed23)/1250+ 
  (as.numeric(dat$drug13==7) *dat$dosed13+(dat$drug23==7) *dat$dosed23)/10+ 
  (as.numeric(dat$drug13==8) *dat$dosed13+(dat$drug23==8) *dat$dosed23)/20+ 
  (as.numeric(dat$drug13==9) *dat$dosed13+(dat$drug23==9) *dat$dosed23)/8+ 
  (as.numeric(dat$drug13==10)*dat$dosed13+(dat$drug23==10)*dat$dosed23)/1500+ 
  (as.numeric(dat$drug13==11)*dat$dosed13+(dat$drug23==11)*dat$dosed23)/900+ 
  (as.numeric(dat$drug13==14)*dat$dosed13+(dat$drug23==14)*dat$dosed23)/400+ 
  (as.numeric(dat$drug13==15)*dat$dosed13+(dat$drug23==15)*dat$dosed23)/500+ 
  (as.numeric(dat$drug13==16)*dat$dosed13+(dat$drug23==16)*dat$dosed23)/2500+ 
  (as.numeric(dat$drug13==17)*dat$dosed13+(dat$drug23==17)*dat$dosed23)/3000+ 
  (as.numeric(dat$drug13==18)*dat$dosed13+(dat$drug23==18)*dat$dosed23)/5+ 
  (as.numeric(dat$drug13==19)*dat$dosed13+(dat$drug23==19)*dat$dosed23)/2000

result$ddd4  <-
  (as.numeric(dat$drug14==1) *dat$dosed14+(dat$drug24==1) *dat$dosed24)/1500+ 
  (as.numeric(dat$drug14==2) *dat$dosed14+(dat$drug24==2) *dat$dosed24)/100+ 
  (as.numeric(dat$drug14==3) *dat$dosed14+(dat$drug24==3) *dat$dosed24)/300+ 
  (as.numeric(dat$drug14==4) *dat$dosed14+(dat$drug24==4) *dat$dosed24)/1000+ 
  (as.numeric(dat$drug14==5) *dat$dosed14+(dat$drug24==5) *dat$dosed24)/1250+ 
  (as.numeric(dat$drug14==6) *dat$dosed14+(dat$drug24==6) *dat$dosed24)/1250+ 
  (as.numeric(dat$drug14==7) *dat$dosed14+(dat$drug24==7) *dat$dosed24)/10+ 
  (as.numeric(dat$drug14==8) *dat$dosed14+(dat$drug24==8) *dat$dosed24)/20+ 
  (as.numeric(dat$drug14==9) *dat$dosed14+(dat$drug24==9) *dat$dosed24)/8+ 
  (as.numeric(dat$drug14==10)*dat$dosed14+(dat$drug24==10)*dat$dosed24)/1500+ 
  (as.numeric(dat$drug14==11)*dat$dosed14+(dat$drug24==11)*dat$dosed24)/900+ 
  (as.numeric(dat$drug14==14)*dat$dosed14+(dat$drug24==14)*dat$dosed24)/400+ 
  (as.numeric(dat$drug14==15)*dat$dosed14+(dat$drug24==15)*dat$dosed24)/500+ 
  (as.numeric(dat$drug14==16)*dat$dosed14+(dat$drug24==16)*dat$dosed24)/2500+ 
  (as.numeric(dat$drug14==17)*dat$dosed14+(dat$drug24==17)*dat$dosed24)/3000+ 
  (as.numeric(dat$drug14==18)*dat$dosed14+(dat$drug24==18)*dat$dosed24)/5+ 
  (as.numeric(dat$drug14==19)*dat$dosed14+(dat$drug24==19)*dat$dosed24)/2000


result$sez1 <- as.numeric(!is.na( as.numeric(dat$sezdatea1y) )) + as.numeric(!is.na( as.numeric(dat$sezdatea2y) ))
result$sez2 <- as.numeric(!is.na( as.numeric(dat$sezdatea2y) )) + as.numeric(!is.na( as.numeric(dat$sezdatea2y) ))
result$sez3 <- as.numeric(!is.na( as.numeric(dat$sezdatea3y) )) + as.numeric(!is.na( as.numeric(dat$sezdatea2y) ))
result$sez4 <- as.numeric(!is.na( as.numeric(dat$sezdatea4y) )) + as.numeric(!is.na( as.numeric(dat$sezdatea2y) ))

result$drug_num_summary <- 0
result$drug_num_summary4 <- 0
for (ii in 1:n) {
  result$drug_num_summary[ii] <- paste(c(result$drug_num_stage1[ii],result$drug_num_stage2[ii],result$drug_num_stage3[ii],
                                         result$drug_num_stage4[ii],result$drug_num_stage5[ii],result$drug_num_stage6[ii],
                                         result$drug_num_stage7[ii]),collapse = "")
  result$drug_num_summary4[ii] <- paste(c(result$A[ii],"-", result$drug_num_stage1[ii],result$drug_num_stage2[ii],result$drug_num_stage3[ii],
                                          result$drug_num_stage4[ii]),collapse = "")
}

dat_baseline$school[dat_baseline$school==8]<-1

result$free_year1 <- dat$free_year1
result$free_year2 <- dat$free_year2
result$free_year3 <- dat$free_year3
result$free_year4 <- dat$free_year4
result$age <- dat$age

result$develop <- as.factor(dat_baseline$develop)
result$school <- as.factor(dat_baseline$school)
result$neurodef<-as.factor(dat_baseline$neurodef)
result$btrauma<-as.factor(dat_baseline$btrauma)
result$headinj<-as.factor(dat_baseline$headinj)

res1<-data.frame(id=result$id,stage=1,ddd=result$ddd1,Y=result$sez1,freeyear = result$free_year1)
res2<-data.frame(id=result$id,stage=2,ddd=result$ddd2,Y=result$sez2,freeyear = result$free_year2)
res3<-data.frame(id=result$id,stage=3,ddd=result$ddd3,Y=result$sez3,freeyear = result$free_year3)
res4<-data.frame(id=result$id,stage=4,ddd=result$ddd4,Y=result$sez4,freeyear = result$free_year4)
res_base<-result%>%select(id,develop,school,neurodef,btrauma,headinj,A,age)

D<-res1
D<-rbind(D,res2)
D<-rbind(D,res3)
D<-rbind(D,res4)
D<-left_join(D,res_base,by="id")

### truncated
plot(density(D$ddd))

D$ddd[D$ddd>1]<-1


### categorize the age
table(D$age)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
##    8 1128  972  532  296  264  144  172   80   64   76   64   16   24   20   32 
##   17   18   19   20   21   22   23   24   25   26   27   28   29   30   32   35 
##   24   20    8   24    8    8    8   16    8    4    4    4    4    8    4    4 
##   43 
##    4
D$age_c<- (D$age>=0)+(D$age>=7)+(D$age>=18)
D$age_c<-as.factor(D$age_c)
table(D$age_c)
## 
##    1    2    3 
## 3200  716  136
D$id<-as.numeric(as.factor(D$id))
D<-D[order(D$id),]
# D<-D[D$stage!=1,]
D$A<-as.factor(D$A)

Fixed effect regression

model3<-glm( Y ~ A  + as.factor(freeyear) + ddd + I((ddd-0.33)^2) + I((ddd-0.33)^3) +
               age_c + develop+school+neurodef+btrauma+headinj , data = D, family = poisson, na.action = na.omit)

model2<-glm( Y ~ A  + as.factor(freeyear) + ddd + I((ddd-0.33)^2)  +
               age_c + develop+school+neurodef+btrauma+headinj , data = D, family = poisson, na.action = na.omit)

model1<-glm( Y ~ A  + as.factor(freeyear) + ddd   +
               age_c + develop+school+neurodef+btrauma+headinj , data = D, family = poisson, na.action = na.omit)

anova(model1,model2,model3)
## Analysis of Deviance Table
## 
## Model 1: Y ~ A + as.factor(freeyear) + ddd + age_c + develop + school + 
##     neurodef + btrauma + headinj
## Model 2: Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + age_c + 
##     develop + school + neurodef + btrauma + headinj
## Model 3: Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + I((ddd - 
##     0.33)^3) + age_c + develop + school + neurodef + btrauma + 
##     headinj
##   Resid. Df Resid. Dev Df Deviance
## 1      4039     1088.1            
## 2      4038     1087.8  1 0.310528
## 3      4037     1087.8  1 0.013147
summary(model2)
## 
## Call:
## glm(formula = Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + 
##     age_c + develop + school + neurodef + btrauma + headinj, 
##     family = poisson, data = D, na.action = na.omit)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2205  -0.2658  -0.1927  -0.1737   2.4865  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.184131   0.093933   1.960   0.0500 *  
## A2                    0.210212   0.073819   2.848   0.0044 ** 
## as.factor(freeyear)1 -1.119525   0.102538 -10.918   <2e-16 ***
## as.factor(freeyear)2 -2.225114   0.098882 -22.503   <2e-16 ***
## as.factor(freeyear)3 -4.361716   0.158075 -27.593   <2e-16 ***
## ddd                   0.235175   0.137876   1.706   0.0881 .  
## I((ddd - 0.33)^2)    -0.173927   0.311989  -0.557   0.5772    
## age_c2                0.062842   0.097356   0.645   0.5186    
## age_c3                0.273389   0.238604   1.146   0.2519    
## develop2             -0.105787   0.156269  -0.677   0.4984    
## school2               0.246862   0.144545   1.708   0.0877 .  
## neurodef2            -0.066708   0.135852  -0.491   0.6234    
## btrauma2             -0.173013   0.133966  -1.291   0.1965    
## headinj2              0.004046   0.188115   0.022   0.9828    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3403.0  on 4051  degrees of freedom
## Residual deviance: 1087.8  on 4038  degrees of freedom
## AIC: 2680
## 
## Number of Fisher Scoring iterations: 6
x<-(1:1000)/1000
y<- sapply(x, function(x) 0.235*x - (x-0.33)^2*0.174 +0.33^2*0.174 )
plot(x,y,type = "l")

Zero inflated?

Because about 87% records have no seizure occurrence, the data is zero inflated. Logistic regression fails to converge. So poisson model is used here.

I tried zero inflated model , but they can’t converge either. Besides, it can’t include random effects.

Generalized linear mixed model + poisson family

### GLMM
library(lme4)
## Warning: package 'lme4' was built under R version 3.6.2
## Loading required package: Matrix
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
model0<-glmer( Y~ A  + as.factor(freeyear) + ddd + I((ddd-0.33)^2) +
                 age_c + develop+school+neurodef+btrauma+headinj +(1|id) ,
               data = D, family = poisson, na.action = na.omit)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00347778 (tol = 0.002, component 1)
summary(model0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + age_c +  
##     develop + school + neurodef + btrauma + headinj + (1 | id)
##    Data: D
## 
##      AIC      BIC   logLik deviance df.resid 
##   2678.1   2772.7  -1324.0   2648.1     4037 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7954 -0.1661 -0.1342 -0.1195  7.0373 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  id     (Intercept) 0.07413  0.2723  
## Number of obs: 4052, groups:  id, 1013
## 
## Fixed effects:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.0746294  0.1168528   0.639  0.52304    
## A2                    0.2422995  0.0824105   2.940  0.00328 ** 
## as.factor(freeyear)1 -1.1119940  0.1030550 -10.790  < 2e-16 ***
## as.factor(freeyear)2 -2.1692410  0.1044937 -20.760  < 2e-16 ***
## as.factor(freeyear)3 -4.3045428  0.1626880 -26.459  < 2e-16 ***
## ddd                   0.2241782  0.1445390   1.551  0.12090    
## I((ddd - 0.33)^2)    -0.1341781  0.3313391  -0.405  0.68551    
## age_c2                0.0751394  0.1080249   0.696  0.48670    
## age_c3                0.2925084  0.2641400   1.107  0.26812    
## develop2             -0.0967025  0.1724804  -0.561  0.57503    
## school2               0.2580840  0.1605013   1.608  0.10784    
## neurodef2            -0.0807188  0.1490424  -0.542  0.58811    
## btrauma2             -0.1695898  0.1467483  -1.156  0.24782    
## headinj2             -0.0009563  0.2082144  -0.005  0.99634    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 14 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00347778 (tol = 0.002, component 1)
x<-(1:1000)/1000
y<- sapply(x, function(x) 0.22*x - (x-0.33)^2*0.134  )
plot(x,y)

Marginal model + GEE + symmetric (any two stages’ correlation is the same)

###  GEE symmetry 
library(gee)

model2<-gee( Y~ A  + as.factor(freeyear) + ddd + I((ddd-0.33)^2) + I((ddd-0.33)^3) +  
               age_c + develop+school+neurodef+btrauma+headinj, data=D, id = D$id, 
             family=poisson("log"), corstr="exchangeable",maxiter = 400)
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##          (Intercept)                   A2 as.factor(freeyear)1 
##          0.196176101          0.210701676         -1.118340155 
## as.factor(freeyear)2 as.factor(freeyear)3                  ddd 
##         -2.224562866         -4.360383049          0.213061225 
##    I((ddd - 0.33)^2)    I((ddd - 0.33)^3)               age_c2 
##         -0.256327443          0.163039662          0.062862037 
##               age_c3             develop2              school2 
##          0.273473714         -0.105707779          0.246065731 
##            neurodef2             btrauma2             headinj2 
##         -0.066645357         -0.173137988          0.003299694
## Warning in gee(Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + I((ddd
## - : Maximum number of iterations consumed
## Warning in gee(Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + I((ddd
## - : Convergence not achieved; results suspect
## Warning in gee(Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + I((ddd
## - : Cgee had an error (code= 104). Results suspect.
summary(model2)
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + 
##     I((ddd - 0.33)^3) + age_c + develop + school + neurodef + 
##     btrauma + headinj, id = D$id, data = D, maxiter = 400, family = poisson("log"), 
##     corstr = "exchangeable")
## 
## Summary of Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.17534575 -0.11889794 -0.11258546 -0.09438353  1.41653211 
## 
## 
## Coefficients:
##                         Estimate Naive S.E.     Naive z Robust S.E.
## (Intercept)          -0.32925896 0.07755730  -4.2453636   0.1446852
## A2                    0.01026090 0.06221172   0.1649352   0.1466828
## as.factor(freeyear)1 -1.92245294 0.07569482 -25.3974177   0.1661907
## as.factor(freeyear)2 -1.74350339 0.05265737 -33.1103375   0.1016499
## as.factor(freeyear)3 -1.90698899 0.05873589 -32.4671853   0.1084582
## ddd                  -0.02299552 0.10682642  -0.2152606   0.1899801
## I((ddd - 0.33)^2)     0.35438820 0.32047183   1.1058326   0.5485767
## I((ddd - 0.33)^3)    -0.52356711 0.61101155  -0.8568858   1.0473925
## age_c2               -0.17028912 0.08797450  -1.9356646   0.2273436
## age_c3               -0.12324234 0.22722721  -0.5423749   0.5165951
## develop2              0.10810878 0.13462552   0.8030333   0.3072725
## school2               0.04934981 0.12822950   0.3848554   0.2993863
## neurodef2            -0.18812662 0.12187723  -1.5435748   0.2775952
## btrauma2              0.15598880 0.10710509   1.4564087   0.2530158
## headinj2              0.18093554 0.16265997   1.1123544   0.3710800
##                          Robust z
## (Intercept)           -2.27569139
## A2                     0.06995301
## as.factor(freeyear)1 -11.56775544
## as.factor(freeyear)2 -17.15203438
## as.factor(freeyear)3 -17.58270266
## ddd                   -0.12104176
## I((ddd - 0.33)^2)      0.64601391
## I((ddd - 0.33)^3)     -0.49987672
## age_c2                -0.74903846
## age_c3                -0.23856661
## develop2               0.35183359
## school2                0.16483658
## neurodef2             -0.67770135
## btrauma2               0.61651789
## headinj2               0.48759168
## 
## Estimated Scale Parameter:  0.7200767
## Number of Iterations:  400
## 
## Working Correlation
##           [,1]      [,2]      [,3]      [,4]
## [1,] 1.0000000 0.9132023 0.9132023 0.9132023
## [2,] 0.9132023 1.0000000 0.9132023 0.9132023
## [3,] 0.9132023 0.9132023 1.0000000 0.9132023
## [4,] 0.9132023 0.9132023 0.9132023 1.0000000
x<-(1:1000)/1000
y<- sapply(x, function(x) -0.1017*x + (x-0.33)^2*0.1019  )
plot(x,y,main="quadratic spline model")

x<-(1:1000)/1000
y<- sapply(x, function(x) -0.023*x + (x-0.33)^2*0.3544- (x-0.33)^3*0.5236  )
plot(x,y,main="cubic spline model")

# model2<-gee( Y~ A  + as.factor(freeyear) + ddd + I((ddd-0.35)*(ddd>0.35)) + 
#                age_c + develop+school+neurodef+btrauma+headinj, data=D, id = D$id, 
#              family=poisson("log"), corstr="exchangeable",maxiter = 400)
# summary(model2)

x<-(1:1000)/1000
y<- sapply(x, function(x) -0.177*x + (x-0.35)*(x>0.35)*0.1687  )
plot(x,y,main="bracket line model")

Marginal model + GEE + AR1 ( different stages’ correlation depends on time )

###  GEE AR1

# model2<-gee( Y~ A  + as.factor(freeyear) + ddd  + I((ddd-0.35)*(ddd>0.35))+
#                age_c + develop+school+neurodef+btrauma+headinj,
#              data=D, id = D$id, family=poisson("log"), corstr="AR-M",maxiter = 400)
# summary(model2)


model2<-gee( Y~ A  + as.factor(freeyear) + ddd + I((ddd-0.33)^2) +I((ddd-0.33)^3) + 
               age_c + develop+school+neurodef+btrauma+headinj,
             data=D, id = D$id, family=poisson("log"), corstr="AR-M",maxiter = 400)
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##          (Intercept)                   A2 as.factor(freeyear)1 
##          0.196176101          0.210701676         -1.118340155 
## as.factor(freeyear)2 as.factor(freeyear)3                  ddd 
##         -2.224562866         -4.360383049          0.213061225 
##    I((ddd - 0.33)^2)    I((ddd - 0.33)^3)               age_c2 
##         -0.256327443          0.163039662          0.062862037 
##               age_c3             develop2              school2 
##          0.273473714         -0.105707779          0.246065731 
##            neurodef2             btrauma2             headinj2 
##         -0.066645357         -0.173137988          0.003299694
## Warning in gee(Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + I((ddd
## - : Maximum number of iterations consumed
## Warning in gee(Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + I((ddd
## - : Convergence not achieved; results suspect
## Warning in gee(Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + I((ddd
## - : Cgee had an error (code= 104). Results suspect.
summary(model2)
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     AR-M , M = 1 
## 
## Call:
## gee(formula = Y ~ A + as.factor(freeyear) + ddd + I((ddd - 0.33)^2) + 
##     I((ddd - 0.33)^3) + age_c + develop + school + neurodef + 
##     btrauma + headinj, id = D$id, data = D, maxiter = 400, family = poisson("log"), 
##     corstr = "AR-M")
## 
## Summary of Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.22987087 -0.11959018 -0.10207446 -0.08999848  1.45358403 
## 
## 
## Coefficients:
##                         Estimate Naive S.E.     Naive z Robust S.E.    Robust z
## (Intercept)          -0.41126441 0.09257641  -4.4424319  0.13436008  -3.0609123
## A2                    0.10903360 0.07123634   1.5305897  0.10538014   1.0346693
## as.factor(freeyear)1 -1.76719014 0.07560479 -23.3740512  0.15229997 -11.6033518
## as.factor(freeyear)2 -1.49880061 0.05200031 -28.8229171  0.08775363 -17.0796414
## as.factor(freeyear)3 -1.90761523 0.06267418 -30.4370181  0.10378884 -18.3797727
## ddd                   0.07759999 0.13870728   0.5594514  0.19578702   0.3963490
## I((ddd - 0.33)^2)     0.13351695 0.41969590   0.3181278  0.57400085   0.2326076
## I((ddd - 0.33)^3)    -0.33484456 0.79842909  -0.4193792  1.07477408  -0.3115488
## age_c2               -0.16475611 0.10095727  -1.6319391  0.16516970  -0.9974960
## age_c3               -0.22468971 0.26114418  -0.8604048  0.42459384  -0.5291874
## develop2              0.06246340 0.15366737   0.4064844  0.21906233   0.2851398
## school2               0.04357284 0.14536843   0.2997407  0.21486068   0.2027958
## neurodef2            -0.10109962 0.13455143  -0.7513827  0.19718907  -0.5127040
## btrauma2              0.16276119 0.12671448   1.2844719  0.17885824   0.9100011
## headinj2              0.28670193 0.19001154   1.5088659  0.16241991   1.7651896
## 
## Estimated Scale Parameter:  0.6292758
## Number of Iterations:  400
## 
## Working Correlation
##           [,1]     [,2]     [,3]      [,4]
## [1,] 1.0000000 0.851326 0.724756 0.6170037
## [2,] 0.8513260 1.000000 0.851326 0.7247560
## [3,] 0.7247560 0.851326 1.000000 0.8513260
## [4,] 0.6170037 0.724756 0.851326 1.0000000
x<-(1:1000)/1000
y<- sapply(x, function(x) 0.0277*x - (x-0.33)^2*0.0273  )
plot(x,y,type="l",main="quadratic spline model")

x<-(1:1000)/1000
y<- sapply(x, function(x) 0.0776*x + (x-0.33)^2*0.1335- (x-0.33)^3*0.3348  )
plot(x,y,main="cubic spline model")

# AR1 poisson: 0.033 -0.037 
# exchangeable