library
library(pacman)
library(glmmADMB)
p_load(gee, tidyverse, geepack,aspect, MASS,repolr,
nlme,lme4,ltm,ordinal)
Question 1
dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/starter.txt",header = T)
str(dta1)
## 'data.frame': 1350 obs. of 3 variables:
## $ sid : int 1 1 1 1 1 1 1 1 1 2 ...
## $ item: int 1 2 3 4 5 6 7 8 9 1 ...
## $ resp: int 1 1 1 1 1 1 1 1 0 1 ...
dta1S <- spread(dta1,key = "item",value = "resp")
dta1S <- dta1S[,2:10]
colnames(dta1S) <- c("item_1","item_2","item_3","item_4",
"item_5","item_6","item_7","item_8","item_9")
#
dta1S <- dta1S[, order(colMeans(dta1S))]
dta1S$ts <- apply(dta1S, 1, sum)
pm <- data.frame(matrix(NA, 10, 10))
for(i in 0:9){
pm[(i+1),] <- sapply(subset(dta1S, ts == i), mean)
}
pm[1,1:10] = 0
pm[2,1:9] = 0
pm[2,10] = 1
names(pm) <- names(dta1S)
#
pmL <- pm %>%
gather( Item, PC, 1:9) %>%
mutate( Item = factor(Item))
#
aggregate(PC ~ Item, data = pmL, mean)
## Item PC
## 1 item_1 0.7079506
## 2 item_2 0.5442158
## 3 item_3 0.5196779
## 4 item_4 0.5090154
## 5 item_5 0.4535898
## 6 item_6 0.4854733
## 7 item_7 0.5802277
## 8 item_8 0.3492669
## 9 item_9 0.2505826
#
pmL$Item <- as.factor(pmL$Item)
levels(pmL$Item) <- c("item_1","item_7","item_2","item_3",
"item_4","item_6","item_5","item_8","item_9")
# observed proportion of correct responses by total score
ggplot(pmL, aes(ts, PC, col = Item)) +
geom_point() +
geom_line() +
labs(x = "Total score", y = "Proportion correct") +
theme(legend.position = c(.1, .8))+
theme_bw()

#
dta1S$ID <- factor(paste0("P", 1000+(1:dim(dta1S)[1])))
dtaL <- dta1S %>% gather( Item, Answer, 1:9)
dtaL <- dtaL[order(dtaL$ID), ]
row.names(dtaL) <- NULL
#
prop.table(with(dtaL, ftable(Item, Answer)), 1)
## Answer 0 1
## Item
## item_1 0.0800000 0.9200000
## item_2 0.2000000 0.8000000
## item_3 0.2533333 0.7466667
## item_4 0.2666667 0.7333333
## item_5 0.3066667 0.6933333
## item_6 0.3200000 0.6800000
## item_7 0.1800000 0.8200000
## item_8 0.4600000 0.5400000
## item_9 0.6666667 0.3333333
#
summary(m0 <- glmer(Answer ~ -1 + Item + (1 | ID), data = dtaL,
family = binomial))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: Answer ~ -1 + Item + (1 | ID)
## Data: dtaL
##
## AIC BIC logLik deviance df.resid
## 1430.0 1482.1 -705.0 1410.0 1340
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.0215 -0.5795 0.3395 0.5451 3.6578
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 1.165 1.079
## Number of obs: 1350, groups: ID, 150
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## Itemitem_1 2.903013 0.001078 2691.9 < 2e-16 ***
## Itemitem_2 1.703233 0.001078 1579.3 < 2e-16 ***
## Itemitem_3 1.337304 0.001054 1268.8 < 2e-16 ***
## Itemitem_4 1.210188 0.212338 5.7 1.20e-08 ***
## Itemitem_5 1.001692 0.206533 4.9 1.23e-06 ***
## Itemitem_6 0.924094 0.204687 4.5 6.34e-06 ***
## Itemitem_7 1.864884 0.001046 1783.2 < 2e-16 ***
## Itemitem_8 0.203071 0.001054 192.7 < 2e-16 ***
## Itemitem_9 -0.877798 0.205296 -4.3 1.90e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## Itmt_1 Itmt_2 Itmt_3 Itmt_4 Itmt_5 Itmt_6 Itmt_7 Itmt_8
## Itemitem_2 -0.104
## Itemitem_3 -0.183 -0.183
## Itemitem_4 0.000 0.000 0.000
## Itemitem_5 0.002 0.002 0.003 0.101
## Itemitem_6 0.000 0.001 0.001 0.102 0.105
## Itemitem_7 -0.231 -0.231 0.095 -0.001 0.004 0.001
## Itemitem_8 0.183 0.183 0.156 0.002 -0.002 0.001 -0.095
## Itemitem_9 -0.001 -0.001 -0.001 0.093 0.098 0.099 -0.002 0.003
## convergence code: 0
## Model failed to converge with max|grad| = 0.283009 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
#
fit <- rasch(dta1S[,1:9])
plot(fit, legend = TRUE, cx = "right", lwd = 1, cex = 1.4,
cex.lab = 1.6, cex.main = 2, col = 1, lty = c(1, 1, 1, 2, 2),
pch = c(16, 15, 17, 0, 1))
abline(v = 0, h = 0.5, col = "black", lty = "dotted")

plot(fit, type = "IIC", items = 1:9, legend = TRUE, lwd = 2, cx = "topright")

Question 2
dta2 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/adolescentSmoking.txt",header = T)
str(dta2)
## 'data.frame': 8730 obs. of 5 variables:
## $ newid : int 1 1 1 1 1 2 2 2 2 2 ...
## $ sex : int 1 1 1 1 1 0 0 0 0 0 ...
## $ parsmk: int 0 0 0 0 0 0 0 0 0 0 ...
## $ wave : int 1 2 4 5 6 1 2 3 4 5 ...
## $ smkreg: int 0 0 0 0 0 0 0 0 0 0 ...
dta2 <- dta2%>% mutate(newid = as.factor(newid),
sex = as.factor(sex),
parsmk = as.factor(parsmk),
smkreg = as.factor(smkreg),
wave = as.factor(wave))
with(dta2, ftable(sex, smkreg))
## smkreg 0 1
## sex
## 0 3568 433
## 1 4075 654
t1 <- with(dta2, ftable(sex,wave, smkreg))
prop.dta2 <- as.data.frame(matrix(0,24,5))
for(i in 1:24){
prop.dta2[i,1] = ifelse((i-1) %/% 12 == 0,0,1)
prop.dta2[i,2] = ifelse(i%%6 == 0,6,i%%6)
prop.dta2[i,3] = ifelse((i-1) %/% 6 == 0,0,
ifelse((i-1) %/% 6 == 1,1,
ifelse((i-1) %/% 6 == 2,0,1)))
prop.dta2[i,4] = t1[i]
prop.dta2[i,5] = ifelse(i <= 12 ,t1[i]+t1[i+12],t1[i-12]+t1[i])
}
colnames(prop.dta2) <- c("smkreg","wave","sex","count","sum")
prop.dta2 <- prop.dta2%>% mutate(smkreg = as.factor(smkreg),
wave = as.factor(wave),
sex = as.factor(sex),
prop = count/sum)
levels(prop.dta2$sex) <- c("Male","Female")
#
ggplot(data = prop.dta2[13:24,], aes(x = wave,y = prop, group = sex, linetype = sex))+
geom_point()+
geom_line()+
labs(x = "Wave of study",y = "Propotion of smokers")+
coord_cartesian(ylim=c(0,0.2))+
theme_bw()

#
dta2$wave <- relevel(dta2$wave, ref = "6")
dta2$sex <- relevel(dta2$sex, ref = "1")
dta2$parsmk <- relevel(dta2$parsmk, ref = "1")
# model fit
summary(r2 <- glmer(smkreg ~ wave + sex + parsmk + (1 | newid), data = dta2,
family = "binomial"))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: smkreg ~ wave + sex + parsmk + (1 | newid)
## Data: dta2
##
## AIC BIC logLik deviance df.resid
## 3738.7 3802.4 -1860.4 3720.7 8721
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.9822 -0.0221 -0.0109 -0.0055 6.9059
##
## Random effects:
## Groups Name Variance Std.Dev.
## newid (Intercept) 64.85 8.053
## Number of obs: 8730, groups: newid, 1760
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.3331 0.3413 -15.625 < 2e-16 ***
## wave1 -3.5093 0.3082 -11.385 < 2e-16 ***
## wave2 -2.4826 0.2195 -11.309 < 2e-16 ***
## wave3 -2.1297 0.2120 -10.044 < 2e-16 ***
## wave4 -1.5019 0.2003 -7.497 6.55e-14 ***
## wave5 -1.0621 0.1936 -5.486 4.10e-08 ***
## sex0 -1.0687 0.3373 -3.169 0.00153 **
## parsmk0 -1.8403 0.3258 -5.649 1.62e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) wave1 wave2 wave3 wave4 wave5 sex0
## wave1 0.001
## wave2 -0.046 0.409
## wave3 -0.063 0.399 0.510
## wave4 -0.096 0.379 0.502 0.504
## wave5 -0.129 0.356 0.482 0.489 0.500
## sex0 -0.381 0.031 0.033 0.028 0.022 0.017
## parsmk0 -0.568 0.024 0.022 0.021 0.018 0.015 0.176
## convergence code: 0
## Model failed to converge with max|grad| = 5.45356 (tol = 0.001, component 1)
Question 3
dta3 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/bitterness.txt",header = T)
str(dta3)
## 'data.frame': 72 obs. of 6 variables:
## $ obs : int 1 2 3 4 5 6 7 8 9 10 ...
## $ jdg : int 1 1 1 1 1 1 1 1 2 2 ...
## $ btl : int 1 0 1 0 1 0 1 0 1 0 ...
## $ cntct : int 0 0 1 1 0 0 1 1 0 0 ...
## $ tmpr : int 1 1 1 1 0 0 0 0 1 1 ...
## $ rating: int 2 3 3 4 4 4 5 5 1 2 ...
dta3 <- dta3%>% mutate(jdg = as.factor(jdg),
btl = as.factor(btl),
cntct = as.factor(cntct),
tmpr = as.factor(tmpr))
#
t2 <- with(dta3,ftable(cntct,tmpr,rating))
mean_rating <- as.data.frame(matrix(0,4,3))
for (i in 1:4){
mean_rating[i,1] = ifelse(i<= 2 ,0,1)
mean_rating[i,2] = ifelse(i%% 2,1,0)
mean_rating[i,3] = (1*t2[i]+2*t2[i+4]+3*t2[i+8]+4*t2[i+12]+
5*t2[i+16])/(t2[i]+t2[i+4]+t2[i+8]+t2[i+12]+t2[i+16])
}
colnames(mean_rating) <- c("Contact","Temperature","Rating")
mean_rating <- mean_rating%>%mutate(Contact = as.factor(Contact),
Temperature = as.factor(Temperature))
levels(mean_rating$Contact) <- c("No","Yes")
levels(mean_rating$Temperature) <- c("Warm","Cold")
mean_rating$Temperature <- relevel(mean_rating$Temperature,ref="Cold")
#
ggplot(data = mean_rating,aes(x = Temperature, y = Rating,group = Contact,linetype = Contact))+
geom_point()+
geom_line()+
theme_bw()

#
dta3$btl <- relevel(dta3$btl, ref = "1")
dta3$tmpr <- relevel(dta3$tmpr, ref = "1")
dta3$rating <- as.factor(dta3$rating)
#
summary(r3 <- clmm(rating~btl + cntct + tmpr + (1 | jdg),data = dta3))
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: rating ~ btl + cntct + tmpr + (1 | jdg)
## data: dta3
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 72 -81.43 178.85 400(1203) 1.67e-06 3.1e+01
##
## Random effects:
## Groups Name Variance Std.Dev.
## jdg (Intercept) 1.304 1.142
## Number of groups: jdg 9
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## btl0 0.2441 0.4641 0.526 0.598908
## cntct1 1.8344 0.5125 3.579 0.000345 ***
## tmpr0 3.0727 0.5967 5.149 2.61e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 -1.5084 0.7180 -2.101
## 2|3 1.6448 0.6573 2.503
## 3|4 4.3729 0.8609 5.079
## 4|5 6.2341 1.0172 6.128
Question 4
dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/coupleCommitment.txt",header = T)
str(dta4)
## 'data.frame': 263 obs. of 10 variables:
## $ couple : int 1 1 2 2 3 3 4 4 5 5 ...
## $ gender : int 0 1 0 1 0 1 0 1 0 1 ...
## $ msi : int 4 3 7 4 4 0 8 2 2 3 ...
## $ das : int 70 92 45 49 79 90 92 70 87 75 ...
## $ sex : int 61 64 69 68 49 56 66 68 42 56 ...
## $ afc : int 65 69 69 69 49 57 56 67 65 59 ...
## $ dasc : num -14.49 7.51 -39.49 -35.49 -5.49 ...
## $ sexc : num 0.932 3.932 8.932 7.932 -11.068 ...
## $ afcc : num 1.65 5.65 5.65 5.65 -14.35 ...
## $ infidel: int 0 0 0 0 0 0 0 0 0 0 ...
dta4 <- dta4%>%mutate(couple = as.factor(couple),
gender = as.factor(gender),
infidel = as.factor(infidel))
levels(dta4$gender) <- c("Wife","Husband")
levels(dta4$infidel) <- c("No","Yes")
#
ggplot(data = dta4,aes(x = msi))+
geom_bar()+
facet_grid(infidel~gender)+
labs(x = "Marital Status Inventory ",y = "Count")+
theme_bw()

#
ggplot(data = dta4,aes(x = dasc , y = msi,group = gender,color = gender))+
geom_point()+
stat_smooth(method = "lm",se = FALSE)+
facet_grid(~infidel)+
labs(x = "Dyadic Adjustment Scale (center)", y = "Marital Status Inventory")+
theme_bw()

#
ggplot(data = dta4,aes(x = afcc , y = msi,group = gender,color = gender))+
geom_point()+
stat_smooth(method = "lm")+
facet_grid(~infidel)+
labs(x = "Affective communication (center)", y = "Marital Status Inventory")+
theme_bw()

#
ggplot(data = dta4,aes(x = sexc , y = msi,group = gender,color = gender))+
geom_point()+
stat_smooth(method = "lm")+
facet_grid(~infidel)+
labs(x = "Sexual dyssatisfaction (center)", y = "Marital Status Inventory")+
theme_bw()

#gee
summary(r4_1 <- gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel,
id = couple, data = dta4, family = poisson, corstr = "exchangeable"))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept) dasc infidelYes genderHusband
## 1.127447584 -0.017746288 0.525803868 -0.211613976
## afcc sexc dasc:infidelYes
## 0.020641206 0.006026088 0.020016627
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Logarithm
## Variance to Mean Relation: Poisson
## Correlation Structure: Exchangeable
##
## Call:
## gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel,
## id = couple, data = dta4, family = poisson, corstr = "exchangeable")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -6.0738805 -1.7362610 -0.2607464 1.5689169 6.0691957
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## (Intercept) 1.122761266 0.073815193 15.210436 0.072177241 15.555614
## dasc -0.017848494 0.004029852 -4.429069 0.003410110 -5.233993
## infidelYes 0.505261009 0.144164750 3.504747 0.119537471 4.226800
## genderHusband -0.203380939 0.082501523 -2.465178 0.075600774 -2.690197
## afcc 0.016503411 0.008203429 2.011770 0.007672575 2.150961
## sexc 0.008513665 0.005466519 1.557420 0.005214695 1.632629
## dasc:infidelYes 0.016748315 0.007282451 2.299818 0.006527212 2.565922
##
## Estimated Scale Parameter: 1.958107
## Number of Iterations: 3
##
## Working Correlation
## [,1] [,2]
## [1,] 1.0000000 0.3356346
## [2,] 0.3356346 1.0000000
#glmm
summary(r4_2 <- glmmadmb(msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, random = ~ 1 | couple, data = dta4,
family = "nbinom"))
##
## Call:
## glmmadmb(formula = msi ~ dasc + infidel + gender + afcc + sexc +
## dasc:infidel, data = dta4, family = "nbinom", random = ~1 |
## couple)
##
## AIC: 1154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.98453 0.08235 11.96 < 2e-16 ***
## dasc -0.01984 0.00425 -4.66 3.1e-06 ***
## infidelYes 0.52275 0.17290 3.02 0.0025 **
## genderHusband -0.20499 0.07984 -2.57 0.0102 *
## afcc 0.01601 0.00829 1.93 0.0534 .
## sexc 0.00933 0.00546 1.71 0.0877 .
## dasc:infidelYes 0.01668 0.00810 2.06 0.0395 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of observations: total=263, couple=133
## Random effect variance(s):
## Group=couple
## Variance StdDev
## (Intercept) 0.2853 0.5342
##
## Negative binomial dispersion parameter: 22.856 (std. err.: 31.389)
##
## Log-likelihood: -567.975
dta4$p_gee <- fitted(r4_1)
dta4$p_glmm <- fitted(r4_2)
#
ggplot(data = dta4, aes(x = dasc, y = msi, col = gender))+
geom_point()+
stat_smooth(method = "lm", se = F)+
stat_smooth(aes(y = p_gee), lty = "dotted", method = "lm", se = F)+
stat_smooth(aes(y = p_glmm), lty = "dashed",method = "lm", se = F)+
facet_grid(~infidel)+
labs(x = "Dyadic Adjustment Scale (center)", y = "Marital Status Inventory")+
theme_bw()

#
ggplot(data = dta4, aes(x = afcc, y = msi, col = gender))+
geom_point()+
stat_smooth(method = "lm", se = F)+
stat_smooth(aes(y = p_gee), lty = "dotted", method = "lm", se = F)+
stat_smooth(aes(y = p_glmm), lty = "dashed",method = "lm", se = F)+
facet_grid(~infidel)+
labs(x = "Affective communication (center)", y = "Marital Status Inventory")+
theme_bw()

#
ggplot(data = dta4, aes(x = sexc, y = msi, col = gender))+
geom_point()+
stat_smooth(method = "lm", se = F)+
stat_smooth(aes(y = p_gee), lty = "dotted", method = "lm", se = F)+
stat_smooth(aes(y = p_glmm), lty = "dashed",method = "lm", se = F)+
facet_grid(~infidel)+
labs(x = "Sexual dyssatisfaction (center)", y = "Marital Status Inventory")+
theme_bw()
