1 data input

dta4 <- read.table("adolescentSmoking.txt", h=T)
names(dta4) <- c("ID","sex","parent","wave","smoke")
str(dta4)
## 'data.frame':    8730 obs. of  5 variables:
##  $ ID    : int  1 1 1 1 1 2 2 2 2 2 ...
##  $ sex   : int  1 1 1 1 1 0 0 0 0 0 ...
##  $ parent: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ wave  : int  1 2 4 5 6 1 2 3 4 5 ...
##  $ smoke : int  0 0 0 0 0 0 0 0 0 0 ...
head(dta4)
##   ID sex parent wave smoke
## 1  1   1      0    1     0
## 2  1   1      0    2     0
## 3  1   1      0    4     0
## 4  1   1      0    5     0
## 5  1   1      0    6     0
## 6  2   0      0    1     0

2 data management

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dta4a <- dta4 %>% 
   mutate(ID = factor(ID),
         sex = factor(sex,
                         levels=0:1, 
                         labels=c("Male","Female")),
         parent = factor(parent,
                         levels=0:1, 
                         labels=c("No","Yes")),
         wave = factor(wave,
                       levels=1:6, labels=c(1:6)),
         time = as.numeric(wave),
         ctime = time*0.5,
         smoke = factor(smoke,
                          levels=0:1, 
                          labels=c("No","Yes")))
str(dta4a)
## 'data.frame':    8730 obs. of  7 variables:
##  $ ID    : Factor w/ 1760 levels "1","2","3","4",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ sex   : Factor w/ 2 levels "Male","Female": 2 2 2 2 2 1 1 1 1 1 ...
##  $ parent: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ wave  : Factor w/ 6 levels "1","2","3","4",..: 1 2 4 5 6 1 2 3 4 5 ...
##  $ smoke : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ time  : num  1 2 4 5 6 1 2 3 4 5 ...
##  $ ctime : num  0.5 1 2 2.5 3 0.5 1 1.5 2 2.5 ...

3 proportion of smoke

dta4p <- prop.table(with(dta4a, ftable(sex,wave, smoke)), 1)
# set the data frame 
dta4p1 <- as.data.frame(dta4p)

4 only keep smoke == 1

dta4p2 <- subset(dta4p1, smoke == "Yes", select = c("sex","wave","Freq"))

5 plot proportion of smoker

library(ggplot2)
ggplot(dta4p2, aes(wave, Freq, group = sex)) +
  geom_point(alpha = .5)+
  geom_path(aes(linetype=sex)) +
  scale_y_continuous(limits=c(0, 0.2)) +
  labs(x = "Wave", y = "Proprtion of smokers")

6 GLMM

library(lme4)
## Loading required package: Matrix
summary(m0 <- glmer(smoke ~ sex + parent + wave + (1 | ID), data = dta4a, family = binomial))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 4.75739 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: smoke ~ sex + parent + wave + (1 | ID)
##    Data: dta4a
## 
##      AIC      BIC   logLik deviance df.resid 
##   3803.4   3867.1  -1892.7   3785.4     8721 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4829 -0.0328 -0.0209 -0.0144  4.2244 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 40.98    6.402   
## Number of obs: 8730, groups:  ID, 1760
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -8.0360     0.3549 -22.641  < 2e-16 ***
## sexFemale     0.2196     0.2694   0.815 0.414994    
## parentYes     1.1726     0.2761   4.247 2.17e-05 ***
## wave2        -0.8710     0.2454  -3.550 0.000385 ***
## wave3        -0.3562     0.2394  -1.488 0.136722    
## wave4         0.1828     0.2352   0.777 0.437006    
## wave5         0.6205     0.2339   2.653 0.007969 ** 
## wave6         1.1000     0.2329   4.722 2.34e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr) sexFml prntYs wave2  wave3  wave4  wave5 
## sexFemale -0.415                                          
## parentYes -0.261  0.033                                   
## wave2     -0.381 -0.009  0.011                            
## wave3     -0.422 -0.007  0.009  0.642                     
## wave4     -0.464 -0.008  0.008  0.649  0.667              
## wave5     -0.504 -0.007  0.007  0.647  0.670  0.693       
## wave6     -0.548 -0.012  0.005  0.639  0.666  0.693  0.711
## convergence code: 0
## Model failed to converge with max|grad| = 4.75739 (tol = 0.002, component 1)
# ctime 結果有點怪
summary(m1 <- glmer(smoke ~ sex + parent + ctime + (1 | ID), data = dta4a, family = binomial))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.281717 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: smoke ~ sex + parent + ctime + (1 | ID)
##    Data: dta4a
## 
##      AIC      BIC   logLik deviance df.resid 
##   3698.7   3734.0  -1844.3   3688.7     8725 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8956 -0.0145 -0.0090 -0.0055  5.4759 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 103.4    10.17   
## Number of obs: 8730, groups:  ID, 1760
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.205e+01  2.534e-04  -47536   <2e-16 ***
## sexFemale    8.099e-01  2.534e-04    3196   <2e-16 ***
## parentYes    8.593e-01  2.534e-04    3390   <2e-16 ***
## ctime        1.124e+00  2.535e-04    4435   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr) sexFml prntYs
## sexFemale 0.000               
## parentYes 0.000  0.000        
## ctime     0.000  0.000  0.000 
## convergence code: 0
## Model failed to converge with max|grad| = 0.281717 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?