The data are the responses of a sample of 150 individuals to 9 multiple-choice items from a General Certificate of Education O-level mathematics paper containing 60 items in all. The responses are coded 1 for correct answer and 0 for incorrect. The items appear to test ability in two-dimensional Euclidean geometry. The data have one column with 1,350 records ordered by item within individuals. There are no missing data.
Source: Goldstein, H., & Wood, R. (1989). Five decades of item response modelling, British Journal of Mathematical and Statistical Psychology, 42, 139-67. - Column 1: Subject ID - Column 2: Item ID - Column 3: Answer, 1 = Correct, 0 = Incorrect
pacman::p_load(tidyverse, ggplot2,ltm, eRm, lme4)
dta1<-read.table("starter.txt", h=T)
names(dta1)<-c("ID", "Item", "Resp")
head(dta1)
## ID Item Resp
## 1 1 1 1
## 2 1 2 1
## 3 1 3 1
## 4 1 4 1
## 5 1 5 1
## 6 1 6 1
str(dta1)
## 'data.frame': 1350 obs. of 3 variables:
## $ ID : int 1 1 1 1 1 1 1 1 1 2 ...
## $ Item: int 1 2 3 4 5 6 7 8 9 1 ...
## $ Resp: int 1 1 1 1 1 1 1 1 0 1 ...
dta1_w<-spread(dta1, key="Item", value = "Resp")
names(dta1_w)<-c("ID", paste0("Item", 1:9))
head(dta1_w)
## ID Item1 Item2 Item3 Item4 Item5 Item6 Item7 Item8 Item9
## 1 1 1 1 1 1 1 1 1 1 0
## 2 2 1 1 1 0 0 0 1 0 0
## 3 3 1 1 1 1 0 1 1 0 1
## 4 4 1 1 1 1 1 0 1 0 0
## 5 5 1 1 1 0 0 1 1 0 0
## 6 6 0 1 1 1 0 0 0 0 0
plot(descript(dta1_w),
ylim=c(0,1),
type='b',
main="Item response curves")
grid()
dtaL <- dta1 %>% mutate(Item=factor(Item)) #Factor Items
sjPlot::tab_model(m0 <- glmer(Resp ~ -1 + Item + (1 | ID), data=dtaL,
family=binomial),
show.obs=F, show.ngroups=F,transform=NULL, show.se=T, show.r2=F,show.icc=F)
| Resp | ||||
|---|---|---|---|---|
| Predictors | Log-Odds | std. Error | CI | p |
| Item [1] | 2.90 | 0.34 | 2.24 – 3.56 | <0.001 |
| Item [2] | 1.70 | 0.24 | 1.22 – 2.17 | <0.001 |
| Item [3] | 1.35 | 0.23 | 0.90 – 1.80 | <0.001 |
| Item [4] | 1.24 | 0.22 | 0.80 – 1.69 | <0.001 |
| Item [5] | 1.02 | 0.22 | 0.60 – 1.45 | <0.001 |
| Item [6] | 0.94 | 0.22 | 0.52 – 1.36 | <0.001 |
| Item [7] | 1.86 | 0.25 | 1.37 – 2.36 | <0.001 |
| Item [8] | 0.21 | 0.20 | -0.19 – 0.61 | 0.312 |
| Item [9] | -0.86 | 0.21 | -1.28 – -0.44 | <0.001 |
| Random Effects | ||||
| σ2 | 3.29 | |||
| τ00 ID | 1.17 | |||
#use wide form data
coef(rm0 <- ltm::rasch(dta1_w, constraint=cbind(ncol(dta1_w)+1, 1)))[,1]
## Item1 Item2 Item3 Item4 Item5 Item6 Item7
## -2.8480443 -1.6661044 -1.3091179 -1.2271704 -0.9939141 -0.9196116 -1.8160398
## Item8 Item9
## -0.1993647 0.8417333
plot(rm0)
grid()
rm0 <- RM(dta1_w)
eRm::plotICC(rm0, item.subset=1:4, ask=F, empICC=list("raw"), empCI=list(lty="solid"))
plotPImap(rm0)
Teachers from a sample of 16 schools in California and Michigan were asked:
“If you could go back to college and start all over again, would you again choose teaching as a profession?”
The teachers’ perception of task variety was measured by the extent to which teachers followed the same routine each day.
Source: Raudenbush, S.W., Rowan, B., & Cheong, Y. (1993). Teaching as a non-routine task: Implications for the organizational design of schools. Educational Administrative Quarterly, 29(4), 479-500. - Column 1: Response categories: Yes, Not sure, No - Column 2: Task routine - Column 3: School ID
pacman::p_load(tidyverse, HH, ordinal)
dta2<-read.table("teach_again.txt", h=T)
head(dta2)
## Answer Task School
## 1 Yes -0.2642783 S01
## 2 Yes 0.5709041 S01
## 3 Yes 0.1329710 S01
## 4 Unsure -0.2642783 S01
## 5 No -1.0994610 S01
## 6 Yes 0.5302202 S01
str(dta2)
## 'data.frame': 650 obs. of 3 variables:
## $ Answer: chr "Yes" "Yes" "Yes" "Unsure" ...
## $ Task : num -0.264 0.571 0.133 -0.264 -1.099 ...
## $ School: chr "S01" "S01" "S01" "S01" ...
#count table
with(dta2, ftable(School, Answer))
## Answer No Unsure Yes
## School
## S01 5 4 15
## S02 9 9 20
## S03 14 10 17
## S04 0 1 8
## S05 1 5 23
## S06 9 12 18
## S07 12 4 26
## S08 7 5 18
## S09 2 8 18
## S10 21 27 69
## S11 14 12 32
## S12 15 19 18
## S13 13 6 18
## S14 0 1 7
## S15 17 11 25
## S16 13 10 22
#proportion table by School
prop.table(with(dta2, ftable(School, Answer)),1)
## Answer No Unsure Yes
## School
## S01 0.20833333 0.16666667 0.62500000
## S02 0.23684211 0.23684211 0.52631579
## S03 0.34146341 0.24390244 0.41463415
## S04 0.00000000 0.11111111 0.88888889
## S05 0.03448276 0.17241379 0.79310345
## S06 0.23076923 0.30769231 0.46153846
## S07 0.28571429 0.09523810 0.61904762
## S08 0.23333333 0.16666667 0.60000000
## S09 0.07142857 0.28571429 0.64285714
## S10 0.17948718 0.23076923 0.58974359
## S11 0.24137931 0.20689655 0.55172414
## S12 0.28846154 0.36538462 0.34615385
## S13 0.35135135 0.16216216 0.48648649
## S14 0.00000000 0.12500000 0.87500000
## S15 0.32075472 0.20754717 0.47169811
## S16 0.28888889 0.22222222 0.48888889
#mean by School
aggregate(Task ~ School, mean, data = dta2)
## School Task
## 1 S01 0.02089652
## 2 S02 -0.11779710
## 3 S03 -0.17287423
## 4 S04 0.08085690
## 5 S05 -0.14908077
## 6 S06 0.23740790
## 7 S07 -0.13334256
## 8 S08 0.18657692
## 9 S09 0.42902949
## 10 S10 0.01057772
## 11 S11 -0.15770350
## 12 S12 -0.08074803
## 13 S13 0.13885760
## 14 S14 0.89935413
## 15 S15 -0.12168721
## 16 S16 -0.01327148
m <- as.numeric(with(dta2, table(School, Answer)))
m <- as.data.frame(matrix(m, 16, 3))
names(m) <- c("No", "Unsure", "Yes")
rownames(m) <- levels(dta2$School)
m$tot <- apply(m, 1, sum) #tot=sum
m <- m[order((m[,1]+m[,2])/m[,4]), ]
likert(m[, -4], as.percent = T, main="", ylab="")
dtap <- prop.table(with(dta2, ftable(School, Answer)), 1)
dtapl <- as.data.frame(dtap) %>%
mutate( Answer = factor(Answer))
head(dtapl)
## School Answer Freq
## 1 S01 No 0.20833333
## 2 S02 No 0.23684211
## 3 S03 No 0.34146341
## 4 S04 No 0.00000000
## 5 S05 No 0.03448276
## 6 S06 No 0.23076923
#make a cumsum data frame
dtapl$Task <- rep(aggregate(Task ~ School, mean, data = dta2)[,2],3)
dtacp <- data.frame(School = levels(dtapl$School),
as.data.frame(t(apply(dtap, 1, cumsum))))
# transform to long form
dtacpl <- gather(dtacp, Answer, Prop, 2:4) %>%
mutate( Answer = factor(Answer))
ot <- theme_set(theme_bw())
# Plot
ggplot(dtapl, aes(Answer, Freq, group = School)) +
geom_point(alpha = .5)+
geom_line(alpha = .5) +
ylim(c(0, 1)) +
labs(x = "Answer", y = "Categorical response proprtions")
#Cumulative proportions
ggplot(dtacpl, aes(Answer, Prop, group = School)) +
geom_point(alpha = .5)+
geom_line(alpha = .5) +
labs(x = "Answer", y = "Cumulative proportions")
#Categorical response proprtions
ggplot(dtapl, aes(Task, Freq, color = Answer)) +
geom_point()+
stat_smooth(method = "lm", se=F) +
scale_y_continuous(limits=c(0, 1)) +
labs(x = "Task", y = "Categorical response proprtions") +
theme(legend.position = c(.9, .5))
# ordinal package
# cumulative mixed proportional odds model
summary(m0 <- clmm(factor(Answer) ~ Task + (1 | School), data=dta2))
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: factor(Answer) ~ Task + (1 | School)
## data: dta2
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 650 -642.14 1292.27 177(393) 1.91e-04 6.4e+01
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 0.09088 0.3015
## Number of groups: School 16
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Task 0.36488 0.08792 4.15 3.32e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## No|Unsure -1.2659 0.1301 -9.731
## Unsure|Yes -0.2169 0.1176 -1.844
dta2_m0<- data.frame(dta2, phat = fitted(m0))
ggplot(dta2_m0, aes(Task, phat, color = Answer)) +
geom_point(alpha = .2, pch = 20)+
geom_point(data = dtapl, aes(Task, Freq, color = Answer)) +
stat_smooth(method = "lm", se=F, alpha = .2) +
stat_smooth(data = dtapl, aes(Task, Freq, color = Answer),
method = "lm", se=F, linetype = "dotted") +
scale_y_continuous(limits=(c(0, 1))) +
labs(x = "Task", y = "Categorical response proprtions") +
theme(legend.position = c(.1, .8))
# This is done because there are 0 responses in the frequency table
pn <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "No"), fill = T)
pu <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "Unsure"))
py <- aggregate(phat ~ School, mean, data=subset(dta2_m0, Answer == "Yes"))
# add phat = 0 to S04 and S14 in the No answer category
#fix(pn)
pn <- rbind(pn, c("S04", 0), c("S14", 0))
# put them in the right order by school
pn <- pn[order(pn$School),]
# append predicted prob to the observed p-table
dtapl$phat <- c(pn[,2], pu[,2], py[,2])
dtapl$phat<- as.numeric(dtapl$phat)
# plot observed categ. prop and fitted prob. against Task
ggplot(dtapl, aes(Task, Freq, color = Answer)) +
geom_point(alpha = .3)+
stat_smooth(method = "lm", se = F) +
geom_point(aes(Task, phat, color = Answer), pch = 1)+
stat_smooth(aes(Task, phat, color = Answer),
method = "lm", se = F, lty = 2, lwd = .8) +
scale_y_continuous(limits=c(0, 1)) +
labs(x = "Task", y = "Mean observed and fitted catergorical responses (school)") +
theme(legend.position = c(.9, .5))
The dataset is taken from an intervention study on problematic drinking in college students. Alcohol-related problems, as measured by the Rutgers Alcohol Problem Index, were recorded across 2 years (5 time points) for a sample of 881 students.
Source: Atkins, D.C., Baldwin, S.A., Zheng, C., Gallop, R.J., & Neighbors, C. (2013). A tutorial on count regression and zero-altered count models for longitudinal substance use data. Psycholology of Addictive Behavior. 27(1), 166-177. - Column 1: Subject ID - Column 2: Observation ID within subject - Column 3: Gender ID - Column 4: Time in months - Column 5: Rutgers Alcohol Problem Index
pacman::p_load(tidyverse, lme4, glmmTMB, bbmle)
dta3<-read.csv("rapi.csv")
head(dta3)
## ID Obs_ID Gender Month RAPI
## 1 S1001 1 Men 0 0
## 2 S1001 2 Men 6 0
## 3 S1001 3 Men 18 0
## 4 S1002 1 Women 0 3
## 5 S1002 2 Women 6 6
## 6 S1002 3 Women 12 5
# covert to factor type and add variables for later use
dta3$YearC <- (dta3$Month - mean(unique(dta3$Month))) / 12
# numerical summaries
dim(dta3)
## [1] 3616 6
length(unique(dta3$ID))
## [1] 818
table(table(dta3$ID))
##
## 1 2 3 4 5
## 25 38 66 128 561
dta3 %>%
group_by(Gender, Month) %>%
summarize( mean(RAPI), var(RAPI), sum(RAPI < 1)/n()) %>%
as.data.frame
## Gender Month mean(RAPI) var(RAPI) sum(RAPI < 1)/n()
## 1 Men 0 7.700288 73.80587 0.11815562
## 2 Men 6 8.214744 106.43284 0.14423077
## 3 Men 12 8.039146 130.83060 0.18149466
## 4 Men 18 8.107914 165.65618 0.22661871
## 5 Men 24 7.294776 129.84162 0.25373134
## 6 Women 0 6.335456 49.15957 0.09766454
## 7 Women 6 5.300683 45.99614 0.19817768
## 8 Women 12 4.632319 41.38327 0.25058548
## 9 Women 18 4.990099 62.33737 0.27722772
## 10 Women 24 4.652956 79.29420 0.34961440
ggplot(dta3, aes(RAPI, ..density..)) +
stat_bin(binwidth = 0.9) +
facet_grid(Gender ~ as.factor(Month)) +
labs(x = "Rutgers Alcohol Problem Index")
ggplot(dta3, aes(Month, RAPI, col = Gender)) +
stat_summary(fun.data = "mean_se") +
stat_smooth(method = "glm", method.args = list(family = poisson)) +
labs(y = "Mean Rutgers Alcohol Problem Index", x = "Time (in months)") +
theme(legend.position = c(.1, .1))+
theme_bw()
# individual plots
# Randomly select about 1/3 individuals
# set random seed for reproduction
dta3$ID <-factor(dta3$ID)
set.seed(11092016)
n3 <- sample(levels(dta3$ID), round(length(levels(dta3$ID))*1/3))
ggplot(dta3[(dta3$ID %in% n3), ], aes(Month, RAPI, group = ID)) +
geom_line(alpha = .3) +
geom_point(alpha = .3) +
stat_smooth(aes(group = 1), method = "glm",
method.args = list(family = poisson))+
facet_wrap(~ Gender) +
labs(x = "Time (in months)", y = "Rutgers Alcohol Problem Index") +
theme(legend.position = "NONE")
# Model with interaction and random cluster by ID
summary(m0 <- glmer(RAPI ~ Gender*YearC + (1 | ID), data = dta3,
family = poisson))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: RAPI ~ Gender * YearC + (1 | ID)
## Data: dta3
##
## AIC BIC logLik deviance df.resid
## 24094 24125 -12042 24084 3611
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.7683 -1.0923 -0.3250 0.7538 15.4325
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 1.04 1.02
## Number of obs: 3616, groups: ID, 818
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.55567 0.05691 27.336 < 2e-16 ***
## GenderWomen -0.34717 0.07503 -4.627 3.71e-06 ***
## YearC -0.01429 0.01338 -1.068 0.285
## GenderWomen:YearC -0.12146 0.01910 -6.358 2.05e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) GndrWm YearC
## GenderWomen -0.755
## YearC 0.030 -0.023
## GndrWmn:YrC -0.021 0.035 -0.700
# add individual observation variability
summary(m1 <- lme4::glmer(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
data = dta3, family = poisson))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
## Data: dta3
##
## AIC BIC logLik deviance df.resid
## 19448.0 19497.5 -9716.0 19432.0 3608
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.03682 -0.53730 -0.03497 0.24218 1.29824
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## ID:Obs_ID (Intercept) 0.3760 0.6132
## ID (Intercept) 1.1197 1.0582
## YearC 0.3352 0.5790 0.64
## Number of obs: 3616, groups: ID:Obs_ID, 3616; ID, 818
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.32446 0.06265 21.140 < 2e-16 ***
## GenderWomen -0.39207 0.08215 -4.772 1.82e-06 ***
## YearC -0.26337 0.04620 -5.700 1.20e-08 ***
## GenderWomen:YearC -0.19136 0.06025 -3.176 0.00149 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) GndrWm YearC
## GenderWomen -0.753
## YearC 0.498 -0.371
## GndrWmn:YrC -0.373 0.497 -0.739
# add random slopes uncorrelated with intercepts
m2 <- lme4::glmer(RAPI ~ Gender*YearC + (YearC - 1 | ID) + (1 | ID) +
(1 | ID:Obs_ID),
data = dta3, family = poisson)
## compare nested models - max-likelihood
anova(m0, m1, m2)
## Data: dta3
## Models:
## m0: RAPI ~ Gender * YearC + (1 | ID)
## m2: RAPI ~ Gender * YearC + (YearC - 1 | ID) + (1 | ID) + (1 | ID:Obs_ID)
## m1: RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 5 24094 24125 -12042.0 24084
## m2 7 19562 19606 -9774.3 19548 4535.51 2 < 2.2e-16 ***
## m1 8 19448 19498 -9716.0 19432 116.54 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# residual plot
plot(m2, pch = 20, cex = .6, xlab = "Fitted values")
## Residuals over time
plot(m2, resid(.) ~ fitted(.) | as.factor(Month) + Gender, abline = c(h = 0),
type = c("p", "smooth"), pch = 20, cex = .6,
xlab = "Fitted values", ylab = "Pearson residuals")
# normality check
lattice::qqmath(m2, type = c("p", "g"))
# unit-specific rate ratios
print(exp(fixef(m2)), 3)
## (Intercept) GenderWomen YearC GenderWomen:YearC
## 3.835 0.684 0.844 0.835
#
VarCorr(m2)
## Groups Name Std.Dev.
## ID.Obs_ID (Intercept) 0.61779
## ID (Intercept) 0.99996
## ID.1 YearC 0.51389
##
# negative binomial, zero-inflated negative binomial
##
# mean linear related to variance
summary(mnb1 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
data = dta3, family = nbinom1))
## Family: nbinom1 ( log )
## Formula: RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
## Data: dta3
##
## AIC BIC logLik deviance df.resid
## 19416.0 19471.8 -9699.0 19398.0 3607
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 0.9538 0.9766
## YearC 0.2497 0.4997 0.64
## ID:Obs_ID (Intercept) 0.1922 0.4384
## Number of obs: 3616, groups: ID, 818; ID:Obs_ID, 3616
##
## Overdispersion parameter for nbinom1 family (): 1.06
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.44252 0.06147 23.468 < 2e-16 ***
## GenderWomen -0.37184 0.07719 -4.817 1.45e-06 ***
## YearC -0.24691 0.04335 -5.695 1.23e-08 ***
## GenderWomen:YearC -0.17239 0.05613 -3.071 0.00213 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# variance to mean with a quadratic trend
mnb2 <- update(mnb1, . ~ ., family = nbinom2)
summary(mnb2)
## Family: nbinom2 ( log )
## Formula:
## RAPI ~ Gender + YearC + (YearC | ID) + (1 | ID:Obs_ID) + Gender:YearC
## Data: dta3
##
## AIC BIC logLik deviance df.resid
## 19391.5 19447.3 -9686.8 19373.5 3607
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 1.105e+00 1.0512343
## YearC 3.022e-01 0.5497099 0.67
## ID:Obs_ID (Intercept) 5.242e-09 0.0000724
## Number of obs: 3616, groups: ID, 818; ID:Obs_ID, 3616
##
## Overdispersion parameter for nbinom2 family (): 2.52
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.51826 0.06294 24.123 < 2e-16 ***
## GenderWomen -0.39704 0.08224 -4.828 1.38e-06 ***
## YearC -0.25902 0.04638 -5.585 2.34e-08 ***
## GenderWomen:YearC -0.19783 0.06015 -3.289 0.00101 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Include zero-inflation term
summary(mznb2 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
zi = ~ Gender + YearC,
data = dta3, family = nbinom2))
## Family: nbinom2 ( log )
## Formula: RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
## Zero inflation: ~Gender + YearC
## Data: dta3
##
## AIC BIC logLik deviance df.resid
## 19297.2 19371.5 -9636.6 19273.2 3604
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 1.094e+00 1.046e+00
## YearC 2.976e-01 5.456e-01 0.64
## ID:Obs_ID (Intercept) 9.483e-09 9.738e-05
## Number of obs: 3616, groups: ID, 818; ID:Obs_ID, 3616
##
## Overdispersion parameter for nbinom2 family (): 3.87
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.55715 0.06323 24.625 < 2e-16 ***
## GenderWomen -0.38236 0.08250 -4.635 3.57e-06 ***
## YearC -0.22259 0.04658 -4.779 1.76e-06 ***
## GenderWomen:YearC -0.19707 0.05869 -3.358 0.000786 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Zero-inflation model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.0617 0.2430 -12.598 <2e-16 ***
## GenderWomen 0.2126 0.2901 0.733 0.4638
## YearC 0.4406 0.1936 2.275 0.0229 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# add dispersion over time
summary(mznb2a <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID), data = dta3,
zi = ~ YearC,
dispformula = ~ YearC,
family = nbinom2))
## Family: nbinom2 ( log )
## Formula: RAPI ~ Gender * YearC + (YearC | ID)
## Zero inflation: ~YearC
## Dispersion: ~YearC
## Data: dta3
##
## AIC BIC logLik deviance df.resid
## 19291.0 19359.1 -9634.5 19269.0 3605
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 1.0920 1.0450
## YearC 0.2881 0.5367 0.66
## Number of obs: 3616, groups: ID, 818
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.56590 0.06304 24.842 < 2e-16 ***
## GenderWomen -0.39309 0.08133 -4.833 1.34e-06 ***
## YearC -0.22937 0.04761 -4.817 1.45e-06 ***
## GenderWomen:YearC -0.19880 0.05828 -3.411 0.000647 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Zero-inflation model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9281 0.1601 -18.289 < 2e-16 ***
## YearC 0.5540 0.2024 2.738 0.00619 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.39174 0.07603 18.304 <2e-16 ***
## YearC 0.24835 0.11268 2.204 0.0275 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# model comparisions - across different models
# need package bbmle
AICtab(mnb1, mnb2, mznb2, mznb2a, base = T, logLik = T)
## logLik AIC dLogLik dAIC df
## mznb2a -9634.5 19291.0 64.5 0.0 11
## mznb2 -9636.6 19297.2 62.4 6.2 12
## mnb2 -9686.8 19391.5 12.2 100.6 9
## mnb1 -9699.0 19416.0 0.0 125.0 9
## diagnostics
# fortify data with residuals and fitted values
dta3$rs <- resid(mznb2a)
dta3$yhat <- predict(mznb2a)
## place observed and fitted CIs side-by-side
ggplot(dta3, aes(Month, RAPI, col = Gender)) +
stat_summary(fun.data = "mean_cl_boot") +
stat_summary(aes(y = yhat), fun.data = "mean_cl_boot", pch = 20,
linetype = "dashed", position = position_dodge(width = .9)) +
labs(y = "Mean Rutgers Alcohol Problem Index", x = "Time (in months)") +
theme(legend.position = c(.1, .1))
# residual plot
ggplot(dta3, aes(x = yhat, y = scale(rs))) +
geom_point(col = "steelblue", alpha = I(0.2)) +
geom_hline(yintercept = 0, linetype = 2) +
facet_grid(Gender ~ Month) +
labs(x = "Fitted values", y = "Standardized residuals")