LLM_GLMM

library

library(pacman)
library(glmmADMB)
p_load(gee, tidyverse, geepack,aspect, MASS,repolr,
       nlme,lme4,ltm,ordinal)

Question 1

dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/starter.txt",header = T)
str(dta1)

## 'data.frame':    1350 obs. of  3 variables:
##  $ sid : int  1 1 1 1 1 1 1 1 1 2 ...
##  $ item: int  1 2 3 4 5 6 7 8 9 1 ...
##  $ resp: int  1 1 1 1 1 1 1 1 0 1 ...

dta1S <- spread(dta1,key = "item",value = "resp")
dta1S <- dta1S[,2:10]
colnames(dta1S) <- c("item_1","item_2","item_3","item_4",
                     "item_5","item_6","item_7","item_8","item_9")

#
dta1S <- dta1S[, order(colMeans(dta1S))]
dta1S$ts <- apply(dta1S, 1, sum)
pm <- data.frame(matrix(NA, 10, 10))

for(i in 0:9){
  pm[(i+1),] <- sapply(subset(dta1S, ts == i), mean)
}

pm[1,1:10] = 0
pm[2,1:9] = 0
pm[2,10] = 1

names(pm) <- names(dta1S)

#
pmL <- pm %>%
  gather( Item, PC, 1:9) %>%
  mutate( Item = factor(Item))

#
aggregate(PC ~ Item, data = pmL, mean)

##     Item        PC
## 1 item_1 0.7079506
## 2 item_2 0.5442158
## 3 item_3 0.5196779
## 4 item_4 0.5090154
## 5 item_5 0.4535898
## 6 item_6 0.4854733
## 7 item_7 0.5802277
## 8 item_8 0.3492669
## 9 item_9 0.2505826

#
pmL$Item <- as.factor(pmL$Item)
levels(pmL$Item) <- c("item_1","item_7","item_2","item_3",
                      "item_4","item_6","item_5","item_8","item_9")

# observed proportion of correct responses by total score
ggplot(pmL, aes(ts, PC, col = Item)) +
  geom_point() +
  geom_line() +
  labs(x = "Total score", y = "Proportion correct") +
  theme(legend.position = c(.1, .8))+
  theme_bw()

#
dta1S$ID <- factor(paste0("P", 1000+(1:dim(dta1S)[1])))

dtaL <- dta1S %>% gather( Item, Answer, 1:9)

dtaL <- dtaL[order(dtaL$ID), ]

row.names(dtaL) <- NULL

#
prop.table(with(dtaL, ftable(Item, Answer)), 1)

##        Answer         0         1
## Item                             
## item_1        0.0800000 0.9200000
## item_2        0.2000000 0.8000000
## item_3        0.2533333 0.7466667
## item_4        0.2666667 0.7333333
## item_5        0.3066667 0.6933333
## item_6        0.3200000 0.6800000
## item_7        0.1800000 0.8200000
## item_8        0.4600000 0.5400000
## item_9        0.6666667 0.3333333

#
summary(m0 <- glmer(Answer ~ -1 + Item + (1 | ID), data = dtaL, 
                    family = binomial))

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Answer ~ -1 + Item + (1 | ID)
##    Data: dtaL
## 
##      AIC      BIC   logLik deviance df.resid 
##   1430.0   1482.1   -705.0   1410.0     1340 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.0215 -0.5795  0.3395  0.5451  3.6578 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 1.165    1.079   
## Number of obs: 1350, groups:  ID, 150
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## Itemitem_1  2.903013   0.001078  2691.9  < 2e-16 ***
## Itemitem_2  1.703233   0.001078  1579.3  < 2e-16 ***
## Itemitem_3  1.337304   0.001054  1268.8  < 2e-16 ***
## Itemitem_4  1.210188   0.212338     5.7 1.20e-08 ***
## Itemitem_5  1.001692   0.206533     4.9 1.23e-06 ***
## Itemitem_6  0.924094   0.204687     4.5 6.34e-06 ***
## Itemitem_7  1.864884   0.001046  1783.2  < 2e-16 ***
## Itemitem_8  0.203071   0.001054   192.7  < 2e-16 ***
## Itemitem_9 -0.877798   0.205296    -4.3 1.90e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            Itmt_1 Itmt_2 Itmt_3 Itmt_4 Itmt_5 Itmt_6 Itmt_7 Itmt_8
## Itemitem_2 -0.104                                                 
## Itemitem_3 -0.183 -0.183                                          
## Itemitem_4  0.000  0.000  0.000                                   
## Itemitem_5  0.002  0.002  0.003  0.101                            
## Itemitem_6  0.000  0.001  0.001  0.102  0.105                     
## Itemitem_7 -0.231 -0.231  0.095 -0.001  0.004  0.001              
## Itemitem_8  0.183  0.183  0.156  0.002 -0.002  0.001 -0.095       
## Itemitem_9 -0.001 -0.001 -0.001  0.093  0.098  0.099 -0.002  0.003
## convergence code: 0
## Model failed to converge with max|grad| = 0.283009 (tol = 0.001, component 1)
## Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?

#
fit <- rasch(dta1S[,1:9])

plot(fit, legend = TRUE, cx = "right", lwd = 1, cex = 1.4,
     cex.lab = 1.6, cex.main = 2, col = 1, lty = c(1, 1, 1, 2, 2),
     pch = c(16, 15, 17, 0, 1))
abline(v = 0, h = 0.5, col = "black", lty = "dotted")

plot(fit, type = "IIC", items = 1:9, legend = TRUE, lwd = 2, cx = "topright")

Question 2

dta2 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/adolescentSmoking.txt",header = T)
str(dta2)

## 'data.frame':    8730 obs. of  5 variables:
##  $ newid : int  1 1 1 1 1 2 2 2 2 2 ...
##  $ sex   : int  1 1 1 1 1 0 0 0 0 0 ...
##  $ parsmk: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ wave  : int  1 2 4 5 6 1 2 3 4 5 ...
##  $ smkreg: int  0 0 0 0 0 0 0 0 0 0 ...

dta2 <- dta2%>% mutate(newid = as.factor(newid),
                       sex = as.factor(sex),
                       parsmk = as.factor(parsmk),
                       smkreg = as.factor(smkreg),
                       wave = as.factor(wave))
with(dta2, ftable(sex, smkreg))

##     smkreg    0    1
## sex                 
## 0          3568  433
## 1          4075  654

t1 <- with(dta2, ftable(sex,wave, smkreg))
prop.dta2 <- as.data.frame(matrix(0,24,5))
for(i in 1:24){
    prop.dta2[i,1] = ifelse((i-1) %/% 12 == 0,0,1)
    prop.dta2[i,2] = ifelse(i%%6 == 0,6,i%%6)
    prop.dta2[i,3] = ifelse((i-1) %/% 6 == 0,0,
                            ifelse((i-1) %/% 6 == 1,1,
                            ifelse((i-1) %/% 6 == 2,0,1)))
    prop.dta2[i,4] = t1[i]
    prop.dta2[i,5] = ifelse(i <= 12 ,t1[i]+t1[i+12],t1[i-12]+t1[i])
}
colnames(prop.dta2) <- c("smkreg","wave","sex","count","sum")
prop.dta2 <- prop.dta2%>% mutate(smkreg = as.factor(smkreg),
                                 wave = as.factor(wave),
                                 sex = as.factor(sex),
                                 prop = count/sum)

levels(prop.dta2$sex) <- c("Male","Female")
#
ggplot(data = prop.dta2[13:24,], aes(x = wave,y = prop, group = sex, linetype = sex))+
  geom_point()+
  geom_line()+
  labs(x = "Wave of study",y = "Propotion of smokers")+
  coord_cartesian(ylim=c(0,0.2))+
  theme_bw()

# 
dta2$wave <- relevel(dta2$wave, ref = "6")
dta2$sex <- relevel(dta2$sex, ref = "1")
dta2$parsmk <- relevel(dta2$parsmk, ref = "1")

# model fit
summary(r2 <- glmer(smkreg ~ wave + sex + parsmk + (1 | newid), data = dta2, 
                    family = "binomial"))

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: smkreg ~ wave + sex + parsmk + (1 | newid)
##    Data: dta2
## 
##      AIC      BIC   logLik deviance df.resid 
##   3738.7   3802.4  -1860.4   3720.7     8721 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9822 -0.0221 -0.0109 -0.0055  6.9059 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  newid  (Intercept) 64.85    8.053   
## Number of obs: 8730, groups:  newid, 1760
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -5.3331     0.3413 -15.625  < 2e-16 ***
## wave1        -3.5093     0.3082 -11.385  < 2e-16 ***
## wave2        -2.4826     0.2195 -11.309  < 2e-16 ***
## wave3        -2.1297     0.2120 -10.044  < 2e-16 ***
## wave4        -1.5019     0.2003  -7.497 6.55e-14 ***
## wave5        -1.0621     0.1936  -5.486 4.10e-08 ***
## sex0         -1.0687     0.3373  -3.169  0.00153 ** 
## parsmk0      -1.8403     0.3258  -5.649 1.62e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) wave1  wave2  wave3  wave4  wave5  sex0  
## wave1    0.001                                          
## wave2   -0.046  0.409                                   
## wave3   -0.063  0.399  0.510                            
## wave4   -0.096  0.379  0.502  0.504                     
## wave5   -0.129  0.356  0.482  0.489  0.500              
## sex0    -0.381  0.031  0.033  0.028  0.022  0.017       
## parsmk0 -0.568  0.024  0.022  0.021  0.018  0.015  0.176
## convergence code: 0
## Model failed to converge with max|grad| = 5.45356 (tol = 0.001, component 1)

Question 3

dta3 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/bitterness.txt",header = T)
str(dta3)

## 'data.frame':    72 obs. of  6 variables:
##  $ obs   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ jdg   : int  1 1 1 1 1 1 1 1 2 2 ...
##  $ btl   : int  1 0 1 0 1 0 1 0 1 0 ...
##  $ cntct : int  0 0 1 1 0 0 1 1 0 0 ...
##  $ tmpr  : int  1 1 1 1 0 0 0 0 1 1 ...
##  $ rating: int  2 3 3 4 4 4 5 5 1 2 ...

dta3 <- dta3%>% mutate(jdg = as.factor(jdg),
                       btl = as.factor(btl),
                       cntct = as.factor(cntct),
                       tmpr = as.factor(tmpr))
#
t2 <- with(dta3,ftable(cntct,tmpr,rating))
mean_rating <- as.data.frame(matrix(0,4,3))
for (i in 1:4){
  mean_rating[i,1] = ifelse(i<= 2 ,0,1)
  mean_rating[i,2] = ifelse(i%% 2,1,0)
  mean_rating[i,3] = (1*t2[i]+2*t2[i+4]+3*t2[i+8]+4*t2[i+12]+
                        5*t2[i+16])/(t2[i]+t2[i+4]+t2[i+8]+t2[i+12]+t2[i+16])
}
colnames(mean_rating) <- c("Contact","Temperature","Rating")
mean_rating <- mean_rating%>%mutate(Contact = as.factor(Contact),
                                    Temperature = as.factor(Temperature))
levels(mean_rating$Contact) <- c("No","Yes")
levels(mean_rating$Temperature) <- c("Warm","Cold")
mean_rating$Temperature <- relevel(mean_rating$Temperature,ref="Cold")
#
ggplot(data = mean_rating,aes(x = Temperature, y = Rating,group = Contact,linetype = Contact))+
  geom_point()+
  geom_line()+
  theme_bw()

#
dta3$btl <- relevel(dta3$btl, ref = "1")
dta3$tmpr <- relevel(dta3$tmpr, ref = "1")
dta3$rating <- as.factor(dta3$rating)
#
summary(r3 <- clmm(rating~btl + cntct + tmpr + (1 | jdg),data = dta3))

## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: rating ~ btl + cntct + tmpr + (1 | jdg)
## data:    dta3
## 
##  link  threshold nobs logLik AIC    niter     max.grad cond.H 
##  logit flexible  72   -81.43 178.85 400(1203) 1.67e-06 3.1e+01
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  jdg    (Intercept) 1.304    1.142   
## Number of groups:  jdg 9 
## 
## Coefficients:
##        Estimate Std. Error z value Pr(>|z|)    
## btl0     0.2441     0.4641   0.526 0.598908    
## cntct1   1.8344     0.5125   3.579 0.000345 ***
## tmpr0    3.0727     0.5967   5.149 2.61e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##     Estimate Std. Error z value
## 1|2  -1.5084     0.7180  -2.101
## 2|3   1.6448     0.6573   2.503
## 3|4   4.3729     0.8609   5.079
## 4|5   6.2341     1.0172   6.128

Question 4

dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/coupleCommitment.txt",header = T)
str(dta4)

## 'data.frame':    263 obs. of  10 variables:
##  $ couple : int  1 1 2 2 3 3 4 4 5 5 ...
##  $ gender : int  0 1 0 1 0 1 0 1 0 1 ...
##  $ msi    : int  4 3 7 4 4 0 8 2 2 3 ...
##  $ das    : int  70 92 45 49 79 90 92 70 87 75 ...
##  $ sex    : int  61 64 69 68 49 56 66 68 42 56 ...
##  $ afc    : int  65 69 69 69 49 57 56 67 65 59 ...
##  $ dasc   : num  -14.49 7.51 -39.49 -35.49 -5.49 ...
##  $ sexc   : num  0.932 3.932 8.932 7.932 -11.068 ...
##  $ afcc   : num  1.65 5.65 5.65 5.65 -14.35 ...
##  $ infidel: int  0 0 0 0 0 0 0 0 0 0 ...

dta4 <- dta4%>%mutate(couple = as.factor(couple),
               gender = as.factor(gender),
               infidel = as.factor(infidel))

levels(dta4$gender) <- c("Wife","Husband")
levels(dta4$infidel) <- c("No","Yes")
#
ggplot(data = dta4,aes(x = msi))+
  geom_bar()+
  facet_grid(infidel~gender)+
  labs(x = "Marital Status Inventory ",y = "Count")+
  theme_bw()

#
ggplot(data = dta4,aes(x = dasc , y = msi,group = gender,color = gender))+
  geom_point()+
  stat_smooth(method = "lm",se = FALSE)+
  facet_grid(~infidel)+
  labs(x = "Dyadic Adjustment Scale (center)", y = "Marital Status Inventory")+
  theme_bw()

#
ggplot(data = dta4,aes(x = afcc , y = msi,group = gender,color = gender))+
  geom_point()+
  stat_smooth(method = "lm")+
  facet_grid(~infidel)+
  labs(x = "Affective communication (center)", y = "Marital Status Inventory")+
  theme_bw()

#
ggplot(data = dta4,aes(x = sexc , y = msi,group = gender,color = gender))+
  geom_point()+
  stat_smooth(method = "lm")+
  facet_grid(~infidel)+
  labs(x = "Sexual dyssatisfaction (center)", y = "Marital Status Inventory")+
  theme_bw()

#gee
summary(r4_1 <- gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, 
                  id = couple, data = dta4, family = poisson, corstr = "exchangeable"))

## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27

## running glm to get initial regression estimate

##     (Intercept)            dasc      infidelYes   genderHusband 
##     1.127447584    -0.017746288     0.525803868    -0.211613976 
##            afcc            sexc dasc:infidelYes 
##     0.020641206     0.006026088     0.020016627

## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, 
##     id = couple, data = dta4, family = poisson, corstr = "exchangeable")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -6.0738805 -1.7362610 -0.2607464  1.5689169  6.0691957 
## 
## 
## Coefficients:
##                     Estimate  Naive S.E.   Naive z Robust S.E.  Robust z
## (Intercept)      1.122761266 0.073815193 15.210436 0.072177241 15.555614
## dasc            -0.017848494 0.004029852 -4.429069 0.003410110 -5.233993
## infidelYes       0.505261009 0.144164750  3.504747 0.119537471  4.226800
## genderHusband   -0.203380939 0.082501523 -2.465178 0.075600774 -2.690197
## afcc             0.016503411 0.008203429  2.011770 0.007672575  2.150961
## sexc             0.008513665 0.005466519  1.557420 0.005214695  1.632629
## dasc:infidelYes  0.016748315 0.007282451  2.299818 0.006527212  2.565922
## 
## Estimated Scale Parameter:  1.958107
## Number of Iterations:  3
## 
## Working Correlation
##           [,1]      [,2]
## [1,] 1.0000000 0.3356346
## [2,] 0.3356346 1.0000000

#glmm
summary(r4_2 <- glmmadmb(msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, random = ~ 1 | couple, data = dta4, 
                         family = "nbinom"))

## 
## Call:
## glmmadmb(formula = msi ~ dasc + infidel + gender + afcc + sexc + 
##     dasc:infidel, data = dta4, family = "nbinom", random = ~1 | 
##     couple)
## 
## AIC: 1154 
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      0.98453    0.08235   11.96  < 2e-16 ***
## dasc            -0.01984    0.00425   -4.66  3.1e-06 ***
## infidelYes       0.52275    0.17290    3.02   0.0025 ** 
## genderHusband   -0.20499    0.07984   -2.57   0.0102 *  
## afcc             0.01601    0.00829    1.93   0.0534 .  
## sexc             0.00933    0.00546    1.71   0.0877 .  
## dasc:infidelYes  0.01668    0.00810    2.06   0.0395 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of observations: total=263, couple=133 
## Random effect variance(s):
## Group=couple
##             Variance StdDev
## (Intercept)   0.2853 0.5342
## 
## Negative binomial dispersion parameter: 22.856 (std. err.: 31.389)
## 
## Log-likelihood: -567.975

dta4$p_gee <- fitted(r4_1)
dta4$p_glmm <- fitted(r4_2)


#
ggplot(data = dta4, aes(x = dasc, y = msi, col = gender))+
  geom_point()+
  stat_smooth(method = "lm", se = F)+
  stat_smooth(aes(y = p_gee), lty = "dotted", method = "lm", se = F)+
  stat_smooth(aes(y = p_glmm), lty = "dashed",method = "lm", se = F)+
  facet_grid(~infidel)+
  labs(x = "Dyadic Adjustment Scale (center)", y = "Marital Status Inventory")+
  theme_bw()

#
ggplot(data = dta4, aes(x = afcc, y = msi, col = gender))+
  geom_point()+
  stat_smooth(method = "lm", se = F)+
  stat_smooth(aes(y = p_gee), lty = "dotted", method = "lm", se = F)+
  stat_smooth(aes(y = p_glmm), lty = "dashed",method = "lm", se = F)+
  facet_grid(~infidel)+
  labs(x = "Affective communication (center)", y = "Marital Status Inventory")+
  theme_bw()

#
ggplot(data = dta4, aes(x = sexc, y = msi, col = gender))+
  geom_point()+
  stat_smooth(method = "lm", se = F)+
  stat_smooth(aes(y = p_gee), lty = "dotted", method = "lm", se = F)+
  stat_smooth(aes(y = p_glmm), lty = "dashed",method = "lm", se = F)+
  facet_grid(~infidel)+
  labs(x = "Sexual dyssatisfaction (center)", y = "Marital Status Inventory")+
  theme_bw()

LLM_GLMM

WEN_SUNG

2016年12月5日

library

Question 1

Question 2

Question 3

Question 4