data management
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dta4a <- dta4 %>%
mutate(ID = factor(ID),
sex = factor(sex,
levels=0:1,
labels=c("Male","Female")),
parent = factor(parent,
levels=0:1,
labels=c("No","Yes")),
wave = factor(wave,
levels=1:6, labels=c(1:6)),
time = as.numeric(wave),
ctime = time*0.5,
smoke = factor(smoke,
levels=0:1,
labels=c("No","Yes")))
str(dta4a)
## 'data.frame': 8730 obs. of 7 variables:
## $ ID : Factor w/ 1760 levels "1","2","3","4",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ sex : Factor w/ 2 levels "Male","Female": 2 2 2 2 2 1 1 1 1 1 ...
## $ parent: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ wave : Factor w/ 6 levels "1","2","3","4",..: 1 2 4 5 6 1 2 3 4 5 ...
## $ smoke : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ time : num 1 2 4 5 6 1 2 3 4 5 ...
## $ ctime : num 0.5 1 2 2.5 3 0.5 1 1.5 2 2.5 ...
proportion of smoke
dta4p <- prop.table(with(dta4a, ftable(sex,wave, smoke)), 1)
# set the data frame
dta4p1 <- as.data.frame(dta4p)
only keep smoke == 1
dta4p2 <- subset(dta4p1, smoke == "Yes", select = c("sex","wave","Freq"))
plot proportion of smoker
library(ggplot2)
ggplot(dta4p2, aes(wave, Freq, group = sex)) +
geom_point(alpha = .5)+
geom_path(aes(linetype=sex)) +
scale_y_continuous(limits=c(0, 0.2)) +
labs(x = "Wave", y = "Proprtion of smokers")

GLMM
## Loading required package: Matrix
summary(m0 <- glmer(smoke ~ sex + parent + wave + (1 | ID), data = dta4a, family = binomial))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 4.75739 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: smoke ~ sex + parent + wave + (1 | ID)
## Data: dta4a
##
## AIC BIC logLik deviance df.resid
## 3803.4 3867.1 -1892.7 3785.4 8721
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4829 -0.0328 -0.0209 -0.0144 4.2244
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 40.98 6.402
## Number of obs: 8730, groups: ID, 1760
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.0360 0.3549 -22.641 < 2e-16 ***
## sexFemale 0.2196 0.2694 0.815 0.414994
## parentYes 1.1726 0.2761 4.247 2.17e-05 ***
## wave2 -0.8710 0.2454 -3.550 0.000385 ***
## wave3 -0.3562 0.2394 -1.488 0.136722
## wave4 0.1828 0.2352 0.777 0.437006
## wave5 0.6205 0.2339 2.653 0.007969 **
## wave6 1.1000 0.2329 4.722 2.34e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sexFml prntYs wave2 wave3 wave4 wave5
## sexFemale -0.415
## parentYes -0.261 0.033
## wave2 -0.381 -0.009 0.011
## wave3 -0.422 -0.007 0.009 0.642
## wave4 -0.464 -0.008 0.008 0.649 0.667
## wave5 -0.504 -0.007 0.007 0.647 0.670 0.693
## wave6 -0.548 -0.012 0.005 0.639 0.666 0.693 0.711
## convergence code: 0
## Model failed to converge with max|grad| = 4.75739 (tol = 0.002, component 1)
# ctime 結果有點怪
summary(m1 <- glmer(smoke ~ sex + parent + ctime + (1 | ID), data = dta4a, family = binomial))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.281717 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: smoke ~ sex + parent + ctime + (1 | ID)
## Data: dta4a
##
## AIC BIC logLik deviance df.resid
## 3698.7 3734.0 -1844.3 3688.7 8725
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8956 -0.0145 -0.0090 -0.0055 5.4759
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 103.4 10.17
## Number of obs: 8730, groups: ID, 1760
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.205e+01 2.534e-04 -47536 <2e-16 ***
## sexFemale 8.099e-01 2.534e-04 3196 <2e-16 ***
## parentYes 8.593e-01 2.534e-04 3390 <2e-16 ***
## ctime 1.124e+00 2.535e-04 4435 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sexFml prntYs
## sexFemale 0.000
## parentYes 0.000 0.000
## ctime 0.000 0.000 0.000
## convergence code: 0
## Model failed to converge with max|grad| = 0.281717 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?