1 Exercise 1

1.1 input data

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
dta <- read.table("starter.txt", header=T)

head(dta)

##   sid item resp
## 1   1    1    1
## 2   1    2    1
## 3   1    3    1
## 4   1    4    1
## 5   1    5    1
## 6   1    6    1

tail(dta)

##      sid item resp
## 1345 150    4    1
## 1346 150    5    1
## 1347 150    6    1
## 1348 150    7    1
## 1349 150    8    1
## 1350 150    9    0

1.2 long to wide

dta1 <- spread(dta, key = "item", value = "resp")

names(dta1) <- c("ID","Item 1","Item 2","Item 3","Item 4 ", "Item 5", "Item 6", 
                  "Item 7","Item 8", "Item 9")
dta1<- select(dta1,-1)
head(dta1)

##   Item 1 Item 2 Item 3 Item 4  Item 5 Item 6 Item 7 Item 8 Item 9
## 1      1      1      1       1      1      1      1      1      0
## 2      1      1      1       0      0      0      1      0      0
## 3      1      1      1       1      0      1      1      0      1
## 4      1      1      1       1      1      0      1      0      0
## 5      1      1      1       0      0      1      1      0      0
## 6      0      1      1       1      0      0      0      0      0

1.3 Correlated binary responses

#lecture note p.5
knitr::kable(cor(dta1))

	Item 1	Item 2	Item 3	Item 4	Item 5	Item 6	Item 7	Item 8	Item 9
Item 1	1.0000000	0.0982946	0.0542415	0.0444554	0.1769309	0.0611080	0.1176913	0.0236666	0.0521286
Item 2	0.0982946	1.0000000	0.2069345	0.2261335	0.3180732	0.1572070	0.1561738	0.2742122	0.0353553
Item 3	0.0542415	0.2069345	1.0000000	0.2380182	0.2442222	0.2247635	0.1659775	0.1390151	0.0541944
Item 4	0.0444554	0.2261335	0.2380182	1.0000000	0.1874462	0.1357355	0.1883526	0.2601335	0.2025407
Item 5	0.1769309	0.3180732	0.2442222	0.1874462	1.0000000	0.1326535	0.1399923	0.3144546	0.0715628
Item 6	0.0611080	0.1572070	0.2247635	0.1357355	0.1326535	1.0000000	0.1993889	0.0550560	0.1515848
Item 7	0.1176913	0.1561738	0.1659775	0.1883526	0.1399923	0.1993889	1.0000000	0.0898275	0.1840525
Item 8	0.0236666	0.2742122	0.1390151	0.2601335	0.3144546	0.0550560	0.0898275	1.0000000	0.1702513
Item 9	0.0521286	0.0353553	0.0541944	0.2025407	0.0715628	0.1515848	0.1840525	0.1702513	1.0000000

1.4 Item response probabilties

#lecture note p.4
library(ltm)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: msm

## Loading required package: polycor

knitr::kable(descript(dta1)$perc[c(1, 7, 2, 3, 4, 5, 6, 8, 9),])

	0	1	logit
Item 1	0.0800000	0.9200000	2.4423470
Item 7	0.1800000	0.8200000	1.5163475
Item 2	0.2000000	0.8000000	1.3862944
Item 3	0.2533333	0.7466667	1.0809127
Item 4	0.2666667	0.7333333	1.0116009
Item 5	0.3066667	0.6933333	0.8157495
Item 6	0.3200000	0.6800000	0.7537718
Item 8	0.4600000	0.5400000	0.1603427
Item 9	0.6666667	0.3333333	-0.6931472

第一題答對的機率較高

1.5 Item response curves

#lecture note p.3
plot(descript(dta1), 
     ylim=c(0,1), 
     type='b',
     main="Item response curves")
grid()

1.6 Fit the Rasch model with rasch

#lecture note p.8
coef(rm0 <- ltm::rasch(dta1, constraint=cbind(ncol(dta1)+1, 1)))[,1]

##     Item 1     Item 2     Item 3    Item 4      Item 5     Item 6     Item 7 
## -2.8480443 -1.6661044 -1.3091179 -1.2271704 -0.9939141 -0.9196116 -1.8160398 
##     Item 8     Item 9 
## -0.1993647  0.8417333

plot(rm0)

1.7 Item characteristic curves

#lecture note p.9
library(eRm)
rm0 <- RM(dta1)

plotICC(rm0, item.subset=1:9, ask=F, empICC=list("raw"), empCI=list(lty="solid"))

1.8 The item-person map

plotPImap(rm0)

各個item間的距離無等距,無法區分難易度

1.9 The Rasch model

library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

dta$item <- factor(dta$item)
sjPlot::tab_model(m0 <- glmer(resp ~ -1 + item + (1 | sid), data=dta, family=binomial), show.obs=F, show.ngroups=F, transform=NULL, show.se=T, show.r2=F,show.icc=F)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.28301 (tol = 0.002, component 1)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?

	resp
Predictors	Log-Odds	std. Error	CI	p
item [1]	2.90	0.00	2.90 – 2.91	<0.001
item [2]	1.70	0.00	1.70 – 1.71	<0.001
item [3]	1.34	0.00	1.34 – 1.34	<0.001
item [4]	1.21	0.21	0.79 – 1.63	<0.001
item [5]	1.00	0.21	0.60 – 1.41	<0.001
item [6]	0.92	0.20	0.52 – 1.33	<0.001
item [7]	1.86	0.00	1.86 – 1.87	<0.001
item [8]	0.20	0.00	0.20 – 0.21	<0.001
item [9]	-0.88	0.21	-1.28 – -0.48	<0.001
Random Effects
σ²	3.29
τ₀₀ _sid	1.16

2 Exercise 2

2.1 load and see

# load packages
pacman::p_load(tidyverse, HH, ordinal)

dta2 <- read.table("teach_again.txt", h=T)

head(dta2)

##   Answer       Task School
## 1    Yes -0.2642783    S01
## 2    Yes  0.5709041    S01
## 3    Yes  0.1329710    S01
## 4 Unsure -0.2642783    S01
## 5     No -1.0994610    S01
## 6    Yes  0.5302202    S01

str(dta2)

## 'data.frame':    650 obs. of  3 variables:
##  $ Answer: chr  "Yes" "Yes" "Yes" "Unsure" ...
##  $ Task  : num  -0.264 0.571 0.133 -0.264 -1.099 ...
##  $ School: chr  "S01" "S01" "S01" "S01" ...

2.2 Numerical summary

with(dta2, ftable(School, Answer))

##        Answer No Unsure Yes
## School                     
## S01            5      4  15
## S02            9      9  20
## S03           14     10  17
## S04            0      1   8
## S05            1      5  23
## S06            9     12  18
## S07           12      4  26
## S08            7      5  18
## S09            2      8  18
## S10           21     27  69
## S11           14     12  32
## S12           15     19  18
## S13           13      6  18
## S14            0      1   7
## S15           17     11  25
## S16           13     10  22

2.3 Probability summary

# Probability summary
prop.table(with(dta2, ftable(School, Answer)),1)

##        Answer         No     Unsure        Yes
## School                                        
## S01           0.20833333 0.16666667 0.62500000
## S02           0.23684211 0.23684211 0.52631579
## S03           0.34146341 0.24390244 0.41463415
## S04           0.00000000 0.11111111 0.88888889
## S05           0.03448276 0.17241379 0.79310345
## S06           0.23076923 0.30769231 0.46153846
## S07           0.28571429 0.09523810 0.61904762
## S08           0.23333333 0.16666667 0.60000000
## S09           0.07142857 0.28571429 0.64285714
## S10           0.17948718 0.23076923 0.58974359
## S11           0.24137931 0.20689655 0.55172414
## S12           0.28846154 0.36538462 0.34615385
## S13           0.35135135 0.16216216 0.48648649
## S14           0.00000000 0.12500000 0.87500000
## S15           0.32075472 0.20754717 0.47169811
## S16           0.28888889 0.22222222 0.48888889

# mean
aggregate(Task ~ School, mean, data = dta2)

##    School        Task
## 1     S01  0.02089652
## 2     S02 -0.11779710
## 3     S03 -0.17287423
## 4     S04  0.08085690
## 5     S05 -0.14908077
## 6     S06  0.23740790
## 7     S07 -0.13334256
## 8     S08  0.18657692
## 9     S09  0.42902949
## 10    S10  0.01057772
## 11    S11 -0.15770350
## 12    S12 -0.08074803
## 13    S13  0.13885760
## 14    S14  0.89935413
## 15    S15 -0.12168721
## 16    S16 -0.01327148

2.4 HH for likert scale plots

# save numerical summary as data frame
m <- as.numeric(with(dta2, table(School, Answer)))

# arrange
m <- as.data.frame(matrix(m, 16, 3))

# rename
names(m) <- c("No", "Unsure", "Yes")

# as level
rownames(m) <- levels(dta2$School)

# calculate sum
m$tot <- apply(m, 1, sum)

# order
m <- m[order((m[,1]+m[,2])/m[,4]), ]

# plot
likert(m[, -4], as.percent = T, main="", ylab="")

2.5 Categorical response proprtions

dtap <- prop.table(with(dta2, ftable(School, Answer)), 1)

#
dtapl <- as.data.frame(dtap) %>%
         mutate( Answer = factor(Answer))
         
#
dtapl$Task <- rep(aggregate(Task ~ School, mean, data = dta2)[,2],3)

#
dtacp <- data.frame(School = levels(dtapl$School),
                    as.data.frame(t(apply(dtap, 1, cumsum))))

# wide to long
dtacpl <- gather(dtacp, Answer, Prop, 2:4) %>%
          mutate( Answer = factor(Answer))
# theme set
ot <- theme_set(theme_bw())

# plot
ggplot(dtapl, aes(Answer, Freq, group = School)) +
 geom_point(alpha = .5)+
 geom_line(alpha = .5) +
 ylim(c(0, 1)) +
 labs(x = "Answer", y = "Categorical response proprtions")

2.6 Cumulative proportions

#
ggplot(dtacpl, aes(Answer, Prop, group = School)) +
 geom_point(alpha = .5)+
 geom_line(alpha = .5) +
 labs(x = "Answer", y = "Cumulative proportions")

#
ggplot(dtapl, aes(Task, Freq, color = Answer)) +
 geom_point()+
 stat_smooth(method = "lm", se=F) +
 scale_y_continuous(limits=c(0, 1)) +
 labs(x = "Task", y = "Categorical response proprtions") +
 theme(legend.position = c(.9, .5))

## `geom_smooth()` using formula 'y ~ x'

2.7 cumulative mixed proportional odds model

dta2 <- dta2 %>%
  mutate(Answer = factor(Answer))

summary(m0 <- clmm(Answer ~ Task + (1 | School), data=dta2))

## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: Answer ~ Task + (1 | School)
## data:    dta2
## 
##  link  threshold nobs logLik  AIC     niter    max.grad cond.H 
##  logit flexible  650  -642.14 1292.27 177(393) 1.91e-04 6.4e+01
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  School (Intercept) 0.09088  0.3015  
## Number of groups:  School 16 
## 
## Coefficients:
##      Estimate Std. Error z value Pr(>|z|)    
## Task  0.36488    0.08792    4.15 3.32e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##            Estimate Std. Error z value
## No|Unsure   -1.2659     0.1301  -9.731
## Unsure|Yes  -0.2169     0.1176  -1.844

#
dta_m0 <- data.frame(na.omit(dta2), phat = fitted(m0))
ggplot(dta_m0, aes(Task, phat, color = Answer)) +
 geom_point(alpha = .2, pch = 20)+
 geom_point(data = dtapl, aes(Task, Freq, color = Answer)) +
 stat_smooth(method = "lm", se=F, alpha = .2) +
 stat_smooth(data = dtapl, aes(Task, Freq, color = Answer),
             method = "lm", se=F, linetype = "dotted") +
 scale_y_continuous(limits=(c(0, 1))) +
 labs(x = "Task", y = "Categorical response proprtions") +
 theme(legend.position = c(.1, .8))

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# This is done because there are 0 responses in the frequency table
pn <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "No"), fill = T)
pu <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "Unsure"))
py <- aggregate(phat ~ School, mean, data=subset(dta_m0, Answer == "Yes"))

# add phat = 0 to S04 and S14 in the No answer category
pn <- rbind(pn, c("S04", 0), c("S14", 0))
fix(pn)

# put them in the right order by school
pn <- pn[order(pn$School),]

# append predicted prob to the observed p-table
dtapl$phat <- c(pn[,2], pu[,2], py[,2])
dtapl$phat<- as.numeric(dtapl$phat)

2.8 Mean observed and fitted catergorical responses

# plot observed categ. prop and fitted prob. against Task
ggplot(dtapl, aes(Task, Freq, color = Answer)) +
 geom_point(alpha = .3)+
 stat_smooth(method = "lm", se = F) +
 geom_point(aes(Task, phat, color = Answer), pch = 1)+
 stat_smooth(aes(Task, phat, color = Answer),
                 method = "lm", se = F, lty = 2, lwd = .8) +
 scale_y_continuous(limits=c(0, 1)) +
 labs(x = "Task", y = "Mean observed and fitted catergorical responses (school)") +
 theme(legend.position = c(.9, .5))

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# The end

3 Exercise 3

3.1 load and see

pacman::p_load(tidyverse, lme4, glmmTMB, bbmle)
dta3<-read.csv("rapi.csv")
head(dta3)

##      ID Obs_ID Gender Month RAPI
## 1 S1001      1    Men     0    0
## 2 S1001      2    Men     6    0
## 3 S1001      3    Men    18    0
## 4 S1002      1  Women     0    3
## 5 S1002      2  Women     6    6
## 6 S1002      3  Women    12    5

# covert to factor type and add variables for later use
dta3$YearC <- (dta3$Month - mean(unique(dta3$Month))) / 12
# numerical summaries
dim(dta3)

## [1] 3616    6

length(unique(dta3$ID))

## [1] 818

table(table(dta3$ID))

## 
##   1   2   3   4   5 
##  25  38  66 128 561

dta3 %>%
 group_by(Gender, Month) %>%
 summarize( mean(RAPI), var(RAPI), sum(RAPI < 1)/n()) %>%
 as.data.frame

## `summarise()` regrouping output by 'Gender' (override with `.groups` argument)

##    Gender Month mean(RAPI) var(RAPI) sum(RAPI < 1)/n()
## 1     Men     0   7.700288  73.80587        0.11815562
## 2     Men     6   8.214744 106.43284        0.14423077
## 3     Men    12   8.039146 130.83060        0.18149466
## 4     Men    18   8.107914 165.65618        0.22661871
## 5     Men    24   7.294776 129.84162        0.25373134
## 6   Women     0   6.335456  49.15957        0.09766454
## 7   Women     6   5.300683  45.99614        0.19817768
## 8   Women    12   4.632319  41.38327        0.25058548
## 9   Women    18   4.990099  62.33737        0.27722772
## 10  Women    24   4.652956  79.29420        0.34961440

3.2 plot

ggplot(dta3, aes(RAPI, ..density..)) +
 stat_bin(binwidth = 0.9) +
 facet_grid(Gender ~ as.factor(Month)) +
 labs(x = "Rutgers Alcohol Problem Index")

ggplot(dta3, aes(Month, RAPI, col = Gender)) +
 stat_summary(fun.data = "mean_se") +
 stat_smooth(method = "glm", method.args = list(family = poisson)) +
 labs(y = "Mean Rutgers Alcohol Problem Index", x = "Time (in months)") +
 theme(legend.position = c(.1, .1))+
 theme_bw()

## `geom_smooth()` using formula 'y ~ x'

# individual plots
# Randomly select about 1/3 individuals
# set random seed for reproduction
dta3$ID <-factor(dta3$ID)
set.seed(11092016)
n3 <- sample(levels(dta3$ID), round(length(levels(dta3$ID))*1/3))

ggplot(dta3[(dta3$ID %in% n3), ], aes(Month, RAPI, group = ID)) +
  geom_line(alpha = .3) +
  geom_point(alpha = .3) +
  stat_smooth(aes(group = 1), method = "glm",
              method.args = list(family = poisson))+
  facet_wrap(~ Gender) +
  labs(x = "Time (in months)", y = "Rutgers Alcohol Problem Index") +
  theme(legend.position = "NONE")

## `geom_smooth()` using formula 'y ~ x'

3.3 model

# Model with  interaction and random cluster by ID
summary(m0a <- glmer(RAPI ~ Gender*YearC + (1 | ID), data = dta3,
                           family = poisson))

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: RAPI ~ Gender * YearC + (1 | ID)
##    Data: dta3
## 
##      AIC      BIC   logLik deviance df.resid 
##    24094    24125   -12042    24084     3611 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.7683 -1.0923 -0.3250  0.7538 15.4325 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 1.04     1.02    
## Number of obs: 3616, groups:  ID, 818
## 
## Fixed effects:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.55567    0.05691  27.337  < 2e-16 ***
## GenderWomen       -0.34717    0.07502  -4.628 3.70e-06 ***
## YearC             -0.01429    0.01338  -1.068    0.285    
## GenderWomen:YearC -0.12146    0.01910  -6.358 2.05e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) GndrWm YearC 
## GenderWomen -0.755              
## YearC        0.030 -0.023       
## GndrWmn:YrC -0.021  0.035 -0.700

# add individual observation variability
summary(m1a <- lme4::glmer(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
                    data = dta3, family = poisson))

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
##    Data: dta3
## 
##      AIC      BIC   logLik deviance df.resid 
##  19448.0  19497.5  -9716.0  19432.0     3608 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.03681 -0.53730 -0.03497  0.24218  1.29823 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev. Corr
##  ID:Obs_ID (Intercept) 0.3760   0.6132       
##  ID        (Intercept) 1.1197   1.0581       
##            YearC       0.3352   0.5790   0.64
## Number of obs: 3616, groups:  ID:Obs_ID, 3616; ID, 818
## 
## Fixed effects:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.32445    0.06265  21.140  < 2e-16 ***
## GenderWomen       -0.39203    0.08215  -4.772 1.82e-06 ***
## YearC             -0.26335    0.04620  -5.700 1.20e-08 ***
## GenderWomen:YearC -0.19135    0.06025  -3.176  0.00149 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) GndrWm YearC 
## GenderWomen -0.753              
## YearC        0.498 -0.371       
## GndrWmn:YrC -0.373  0.497 -0.739

# add random slopes uncorrelated with intercepts
m2a <- lme4::glmer(RAPI ~ Gender*YearC + (YearC - 1 | ID) + (1 | ID) + 
               (1 | ID:Obs_ID),
               data = dta3, family = poisson)

3.4 compare nested models

anova(m0a, m1a, m2a)

## Data: dta3
## Models:
## m0a: RAPI ~ Gender * YearC + (1 | ID)
## m2a: RAPI ~ Gender * YearC + (YearC - 1 | ID) + (1 | ID) + (1 | ID:Obs_ID)
## m1a: RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
##     npar   AIC   BIC   logLik deviance   Chisq Df Pr(>Chisq)    
## m0a    5 24094 24125 -12042.0    24084                          
## m2a    7 19563 19606  -9774.3    19549 4535.51  2  < 2.2e-16 ***
## m1a    8 19448 19498  -9716.0    19432  116.54  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.5 residual plot

plot(m2a, pch = 20, cex = .6, xlab = "Fitted values")

3.6 Residuals over time

plot(m2a, resid(.) ~ fitted(.) | as.factor(Month) + Gender, abline = c(h = 0),
     type = c("p", "smooth"), pch = 20, cex = .6,
     xlab = "Fitted values", ylab = "Pearson residuals")

3.7 normality check

lattice::qqmath(m2a, type = c("p", "g"))

3.8 unit-specific rate ratios

print(exp(fixef(m2a)), 3)

##       (Intercept)       GenderWomen             YearC GenderWomen:YearC 
##             3.835             0.684             0.844             0.835

3.9 estimated variances

VarCorr(m2a)

##  Groups    Name        Std.Dev.
##  ID.Obs_ID (Intercept) 0.61780 
##  ID        (Intercept) 0.99997 
##  ID.1      YearC       0.51389

# negative binomial, zero-inflated negative binomial
##

# mean linear related to variance
summary(mnb1 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
                          data = dta3, family = nbinom1))

##  Family: nbinom1  ( log )
## Formula:          RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
## Data: dta3
## 
##      AIC      BIC   logLik deviance df.resid 
##  19416.0  19471.8  -9699.0  19398.0     3607 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance Std.Dev. Corr 
##  ID        (Intercept) 0.9538   0.9766        
##            YearC       0.2497   0.4997   0.64 
##  ID:Obs_ID (Intercept) 0.1922   0.4384        
## Number of obs: 3616, groups:  ID, 818; ID:Obs_ID, 3616
## 
## Overdispersion parameter for nbinom1 family (): 1.06 
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.44252    0.06147  23.468  < 2e-16 ***
## GenderWomen       -0.37184    0.07719  -4.817 1.45e-06 ***
## YearC             -0.24691    0.04335  -5.695 1.23e-08 ***
## GenderWomen:YearC -0.17239    0.05613  -3.071  0.00213 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# variance to mean with a quadratic trend
mnb2 <- update(mnb1, . ~ ., family = nbinom2)

summary(mnb2)

##  Family: nbinom2  ( log )
## Formula:          
## RAPI ~ Gender + YearC + (YearC | ID) + (1 | ID:Obs_ID) + Gender:YearC
## Data: dta3
## 
##      AIC      BIC   logLik deviance df.resid 
##  19391.5  19447.3  -9686.8  19373.5     3607 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance  Std.Dev.  Corr 
##  ID        (Intercept) 1.105e+00 1.0512343      
##            YearC       3.022e-01 0.5497099 0.67 
##  ID:Obs_ID (Intercept) 5.242e-09 0.0000724      
## Number of obs: 3616, groups:  ID, 818; ID:Obs_ID, 3616
## 
## Overdispersion parameter for nbinom2 family (): 2.52 
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.51826    0.06294  24.123  < 2e-16 ***
## GenderWomen       -0.39704    0.08224  -4.828 1.38e-06 ***
## YearC             -0.25902    0.04638  -5.585 2.34e-08 ***
## GenderWomen:YearC -0.19783    0.06015  -3.289  0.00101 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Include zero-inflation term
summary(mznb2 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
                          zi = ~ Gender + YearC,
                          data = dta3, family = nbinom2))

##  Family: nbinom2  ( log )
## Formula:          RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
## Zero inflation:        ~Gender + YearC
## Data: dta3
## 
##      AIC      BIC   logLik deviance df.resid 
##  19297.2  19371.5  -9636.6  19273.2     3604 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance  Std.Dev.  Corr 
##  ID        (Intercept) 1.094e+00 1.046e+00      
##            YearC       2.976e-01 5.456e-01 0.64 
##  ID:Obs_ID (Intercept) 9.484e-09 9.739e-05      
## Number of obs: 3616, groups:  ID, 818; ID:Obs_ID, 3616
## 
## Overdispersion parameter for nbinom2 family (): 3.87 
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.55715    0.06323  24.625  < 2e-16 ***
## GenderWomen       -0.38236    0.08250  -4.635 3.57e-06 ***
## YearC             -0.22259    0.04658  -4.779 1.76e-06 ***
## GenderWomen:YearC -0.19707    0.05869  -3.358 0.000786 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Zero-inflation model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.0617     0.2430 -12.598   <2e-16 ***
## GenderWomen   0.2126     0.2901   0.733   0.4638    
## YearC         0.4406     0.1936   2.275   0.0229 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# add dispersion over time
summary(mznb2a <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID), data = dta3,
                           zi = ~ YearC,
                           dispformula = ~ YearC,
                           family = nbinom2))

##  Family: nbinom2  ( log )
## Formula:          RAPI ~ Gender * YearC + (YearC | ID)
## Zero inflation:        ~YearC
## Dispersion:            ~YearC
## Data: dta3
## 
##      AIC      BIC   logLik deviance df.resid 
##  19291.0  19359.1  -9634.5  19269.0     3605 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev. Corr 
##  ID     (Intercept) 1.0920   1.0450        
##         YearC       0.2881   0.5367   0.66 
## Number of obs: 3616, groups:  ID, 818
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.56590    0.06304  24.842  < 2e-16 ***
## GenderWomen       -0.39309    0.08133  -4.833 1.34e-06 ***
## YearC             -0.22937    0.04761  -4.817 1.45e-06 ***
## GenderWomen:YearC -0.19880    0.05828  -3.411 0.000647 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Zero-inflation model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.9281     0.1601 -18.289  < 2e-16 ***
## YearC         0.5540     0.2024   2.738  0.00619 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.39174    0.07603  18.304   <2e-16 ***
## YearC        0.24835    0.11268   2.204   0.0275 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.10 model comparisions - across different models

AICtab(mnb1, mnb2, mznb2, mznb2a, base = T, logLik = T)

##        logLik  AIC     dLogLik dAIC    df
## mznb2a -9634.5 19291.0    64.5     0.0 11
## mznb2  -9636.6 19297.2    62.4     6.2 12
## mnb2   -9686.8 19391.5    12.2   100.6 9 
## mnb1   -9699.0 19416.0     0.0   125.0 9

3.11 diagnostics

# fortify data with residuals and fitted values
dta3$rs <- resid(mznb2a)
dta3$yhat <- predict(mznb2a)

## place observed and fitted CIs side-by-side
ggplot(dta3, aes(Month, RAPI, col = Gender)) +
 stat_summary(fun.data = "mean_cl_boot") +
 stat_summary(aes(y = yhat), fun.data = "mean_cl_boot", pch = 20, 
              linetype = "dashed", position = position_dodge(width = .9)) +
 labs(y = "Mean Rutgers Alcohol Problem Index", x = "Time (in months)") +
 theme(legend.position = c(.1, .1))

3.12 residual plot

ggplot(dta3, aes(x = yhat, y = scale(rs))) +
  geom_point(col = "steelblue", alpha = I(0.2)) +
  geom_hline(yintercept = 0, linetype = 2) +
  facet_grid(Gender ~ Month) +
  labs(x = "Fitted values", y = "Standardized residuals")

Exercise

tjlee

2020-11-30

1 Exercise 1

1.1 input data

1.2 long to wide

1.3 Correlated binary responses

1.4 Item response probabilties

1.5 Item response curves

1.6 Fit the Rasch model with rasch

1.7 Item characteristic curves

1.8 The item-person map

1.9 The Rasch model

2 Exercise 2

2.1 load and see

2.2 Numerical summary

2.3 Probability summary

2.4 HH for likert scale plots

2.5 Categorical response proprtions

2.6 Cumulative proportions

2.7 cumulative mixed proportional odds model

2.8 Mean observed and fitted catergorical responses

3 Exercise 3

3.1 load and see

3.2 plot

3.3 model

3.4 compare nested models

3.5 residual plot

3.6 Residuals over time

3.7 normality check

3.8 unit-specific rate ratios

3.9 estimated variances

3.10 model comparisions - across different models

3.11 diagnostics

3.12 residual plot