1 Description

A sample of 1,760 Australian adolescents whose smoking patterns were recorded every 6 months for 3 years. Parental smoking and gender variables were also recorded. The goal was to examine if the probability of smoking depends on sex, parental smoking and the wave of the study and how the probability of smoking varies from one individual adolescent to another.

Source: Gelman, A., & Hill, J. (2006). Data Analysis Using Regression and Multilevel/Hierarchical Models. pp. 241-243.

Column 1: Subject ID
Column 2: Gender ID, 1 = Female, 0 = Male
Column 3: Indicator variable for parental smoking 1 = Yes, 0 = No
Column 4: Wave of the study
Column 5: Smoking regularly, 1 = Yes, 0 = No

2 load or install packages to be used

pacman::p_load(magrittr, vcrpart, ggplot2, dplyr, tidyr, ltm, eRm, lme4)

3 Input Data

dta1 <- read.table("C:/Users/HANK/Desktop/HOMEWORK/adolescentSmoking.txt", h=T)

names(dta1) <- c("ID","Gender","Parent", "Wave", "Smoking")

head(dta1)
##   ID Gender Parent Wave Smoking
## 1  1      1      0    1       0
## 2  1      1      0    2       0
## 3  1      1      0    4       0
## 4  1      1      0    5       0
## 5  1      1      0    6       0
## 6  2      0      0    1       0
dta1b <- dta1 %>% 
  mutate(ID = factor(ID),
         Gender = factor(Gender,
                         levels=0:1, 
                         labels=c("Male","Female")),
         Parent = factor(Parent,
                         levels=0:1, 
                         labels=c("No","Yes")),
         Wave = factor(Wave,
                       levels=1:6, labels=c(1:6)),
         Smoking = factor(Smoking,
                          levels=0:1, 
                          labels=c("No","Yes")))
str(dta1b)
## 'data.frame':    8730 obs. of  5 variables:
##  $ ID     : Factor w/ 1760 levels "1","2","3","4",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ Gender : Factor w/ 2 levels "Male","Female": 2 2 2 2 2 1 1 1 1 1 ...
##  $ Parent : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Wave   : Factor w/ 6 levels "1","2","3","4",..: 1 2 4 5 6 1 2 3 4 5 ...
##  $ Smoking: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

4 GLMM

dta1 <- dta1 %>% 
  mutate(ID = factor(ID),
         Smoking = factor(Smoking))

m1 <- olmm(Smoking ~ Parent*Wave + re(1 + Wave | ID), data = dta1, family=cumulative(link="probit"))

summary(m1)$feMatCe[,1]
## [1] 4.35162
summary(m1)$feMatGe
##                Estimate Std. Error   z value
## Parent      -0.99124120 0.29100936 -3.406218
## Wave        -0.03359146 0.10564948 -0.317952
## Parent:Wave -0.14809685 0.06530146 -2.267895
summary(m1)$REmat
## Subject: ID 
##             Variance  StdDev    Corr       
## (Intercept) 6.7083226 2.5900430 (Intercept)
## Wave        0.1123636 0.3352067 0.1091035

5 plot

dta1w <- spread(dta1b, key = "Wave", value = "Smoking")

names(dta1w) <- c("ID","Gender","Parent","Wave1","Wave2", "Wave3", "Wave4", 
                  "Wave5","Wave6")

w1 <- as.numeric(prop.table(ftable(dta1w$Gender, dta1w$Wave1), 1))
w2 <- as.numeric(prop.table(ftable(dta1w$Gender, dta1w$Wave2), 1))
w3 <- as.numeric(prop.table(ftable(dta1w$Gender, dta1w$Wave3), 1))
w4 <- as.numeric(prop.table(ftable(dta1w$Gender, dta1w$Wave4), 1))
w5 <- as.numeric(prop.table(ftable(dta1w$Gender, dta1w$Wave5), 1))
w6 <- as.numeric(prop.table(ftable(dta1w$Gender, dta1w$Wave6), 1))
dta1p <- rbind(w1,w2,w3,w4,w5,w6)
dta1p <- t(dta1p)

clp <- c((dta1p[3, 1:6]),(dta1p[4, 1:6]))
yt <- rep(c(1,2,3,4,5,6),6)
y <- data.frame(wave=yt, clprop=clp)

plot(y[,1], y[,2], type="n",
     xlab="Wave of study", ylab="Proportion of smoker")

lines(dta1p[3, 1:6], lty=1, col="steelblue")
lines(dta1p[4, 1:6], lty=1, col="indianred")
text(6, .12, "Boy", col="steelblue")
text(5, .18, "Girl", col="indianred")
grid()

6 end