#peijun

Refer to the questions here.

Q1 Alcoholic problem (zero-inflated poisson vs negbin)

dta <- read.csv("rapi.csv")
dta$YearC <- (dta$Month - mean(unique(dta$Month))) / 12
dta %>%
  group_by(Gender, Month) %>%
  summarize( mean(RAPI), var(RAPI), sum(RAPI < 1)/n()) %>%
  as.data.frame
   Gender Month mean(RAPI) var(RAPI) sum(RAPI < 1)/n()
1     Men     0   7.700288  73.80587        0.11815562
2     Men     6   8.214744 106.43284        0.14423077
3     Men    12   8.039146 130.83060        0.18149466
4     Men    18   8.107914 165.65618        0.22661871
5     Men    24   7.294776 129.84162        0.25373134
6   Women     0   6.335456  49.15957        0.09766454
7   Women     6   5.300683  45.99614        0.19817768
8   Women    12   4.632319  41.38327        0.25058548
9   Women    18   4.990099  62.33737        0.27722772
10  Women    24   4.652956  79.29420        0.34961440
# Compare distributions with different counts
ggplot(dta, aes(RAPI, ..density..)) +
  stat_bin(binwidth = 0.9) +
  facet_grid(Gender ~ as.factor(Month)) +
  labs(x = "Rutgers Alcohol Problem Index") +
  theme_set(theme_bw())

# negative binomial, zero-inflated negative binomial
mnb1 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
                        data = dta, family = nbinom1)

# variance to mean with a quadratic trend
mnb2 <- update(mnb1, . ~ ., family = nbinom2)

# Include zero-inflation term
mznb2 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
                 zi = ~ Gender + YearC,
                 data = dta, family = nbinom2)

# add dispersion over time
mznb2a <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID), data = dta,
                  zi = ~ YearC,
                  dispformula = ~ YearC,
                  family = nbinom2)
###
# zero-inflated poisson
m1 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID),
              zi = ~ Gender + YearC,
              data = dta, family = poisson)


# model comparisions - across different models
AICtab(mnb1, mnb2, mznb2, mznb2a, m1, base = T, logLik = T)
       logLik   AIC      dLogLik  dAIC     df
mznb2a  -9634.5  19291.0    598.4      0.0 11
mznb2   -9636.6  19297.2    596.3      6.2 12
mnb2    -9686.8  19391.5    546.1    100.6 9 
mnb1    -9699.0  19416.0    533.9    125.0 9 
m1     -10232.9  20485.7      0.0   1194.8 10

Q2 adolescent smoking

dta2 <- read.table("http://140.116.183.121/~sheu/lmm/Data/adolescentSmoking.txt", h = T)
dta2 <- dta2 %>% mutate(sex = factor(sex), parsmk = factor(parsmk), timeC = scale(wave), wave = factor(wave)) %>%
          group_by(sex, wave) %>%
          mutate(smkregM = mean(smkreg)) %>%
          as.data.frame

#plot
ggplot(dta2, aes(wave, smkregM, group = sex, linetype = sex)) +
  geom_point() +
  geom_line() +
  ylim(0, 0.2) +
  labs(x = "wave of study", y = "proportion of smokers")

#model
m0 <- glmer(smkreg ~ sex + parsmk + wave + (1| newid), data = dta2,
            family = binomial)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
$checkConv, : Model failed to converge with max|grad| = 4.75739 (tol =
0.001, component 1)
summary(m0)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: smkreg ~ sex + parsmk + wave + (1 | newid)
   Data: dta2

     AIC      BIC   logLik deviance df.resid 
  3803.4   3867.1  -1892.7   3785.4     8721 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.4829 -0.0328 -0.0209 -0.0144  4.2244 

Random effects:
 Groups Name        Variance Std.Dev.
 newid  (Intercept) 40.98    6.402   
Number of obs: 8730, groups:  newid, 1760

Fixed effects:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -8.0360     0.3549 -22.641  < 2e-16 ***
sex1          0.2196     0.2694   0.815 0.414994    
parsmk1       1.1726     0.2761   4.247 2.17e-05 ***
wave2        -0.8710     0.2454  -3.550 0.000385 ***
wave3        -0.3562     0.2394  -1.488 0.136722    
wave4         0.1828     0.2352   0.777 0.437006    
wave5         0.6205     0.2339   2.653 0.007969 ** 
wave6         1.1000     0.2329   4.722 2.34e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
        (Intr) sex1   prsmk1 wave2  wave3  wave4  wave5 
sex1    -0.415                                          
parsmk1 -0.261  0.033                                   
wave2   -0.381 -0.009  0.011                            
wave3   -0.422 -0.007  0.009  0.642                     
wave4   -0.464 -0.008  0.008  0.649  0.667              
wave5   -0.504 -0.007  0.007  0.647  0.670  0.693       
wave6   -0.548 -0.012  0.005  0.639  0.666  0.693  0.711
convergence code: 0
Model failed to converge with max|grad| = 4.75739 (tol = 0.001, component 1)
m1 <- glmer(smkreg ~ sex + parsmk + timeC + (1| newid), data = dta2,
            family = binomial)
summary(m1)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: smkreg ~ sex + parsmk + timeC + (1 | newid)
   Data: dta2

     AIC      BIC   logLik deviance df.resid 
  3693.7   3729.1  -1841.8   3683.7     8725 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.8966 -0.0155 -0.0093 -0.0053  5.5417 

Random effects:
 Groups Name        Variance Std.Dev.
 newid  (Intercept) 95.78    9.787   
Number of obs: 8730, groups:  newid, 1760

Fixed effects:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  -9.5980     0.3675 -26.120  < 2e-16 ***
sex1          0.1196     0.3326   0.360  0.71904    
parsmk1       1.0863     0.3333   3.259  0.00112 ** 
timeC         0.9090     0.0710  12.803  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
        (Intr) sex1   prsmk1
sex1    -0.501              
parsmk1 -0.387  0.028       
timeC   -0.262 -0.006 -0.005

Q3 couple commitment

dta3 <- read.table("http://140.116.183.121/~sheu/lmm/Data/coupleCommitment.txt", h =T)
dta3 <- dta3 %>% mutate(couple = factor(couple), gender = factor(gender, labels = c("Wife", "Husband")), infidel = factor(infidel, labels = c("No", "Yes")))

# plot
ggplot(dta3, aes(x = msi)) +
  geom_histogram(binwidth = 1) +
  facet_grid(infidel ~ gender) +
  labs(x = "Marital Status Inventory")

ggplot(dta3, aes(x = dasc, y = msi, color = gender)) +
  geom_point() +
  stat_smooth(method = "lm", se = F) +
  facet_wrap(~infidel) +
  labs(x = "Dyadic adjustment scale - centered", y = "Marital Status Inventory")

ggplot(dta3, aes(x = dasc, y = msi, color = gender)) +
  geom_point() +
  stat_smooth(method = "lm", se = F) +
  facet_wrap(~infidel) +
  labs(x = "Dyadic adjustment scale - centered", y = "Marital Status Inventory")

ggplot(dta3, aes(x = afcc, y = msi, color = gender)) +
  geom_point() +
  stat_smooth(method = "lm", se = T) +
  facet_wrap(~infidel) +
  labs(x = "Affection communication scale - centered", y = "Marital Status Inventory")

ggplot(dta3, aes(x = sexc, y = msi, color = gender)) +
  geom_point() +
  stat_smooth(method = "lm", se = T) +
  facet_wrap(~infidel) +
  labs(x = "Sexual dyssatisfaction scale - centered", y = "Marital Status Inventory")

# model
m0 <- gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel,
          id = couple, data = dta3, family = poisson, corstr = "exchangeable")
Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
running glm to get initial regression estimate
    (Intercept)            dasc      infidelYes   genderHusband 
    1.127447584    -0.017746288     0.525803868    -0.211613976 
           afcc            sexc dasc:infidelYes 
    0.020641206     0.006026088     0.020016627 
summary(m0)

 GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
 gee S-function, version 4.13 modified 98/01/27 (1998) 

Model:
 Link:                      Logarithm 
 Variance to Mean Relation: Poisson 
 Correlation Structure:     Exchangeable 

Call:
gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, 
    id = couple, data = dta3, family = poisson, corstr = "exchangeable")

Summary of Residuals:
       Min         1Q     Median         3Q        Max 
-6.0738805 -1.7362610 -0.2607464  1.5689169  6.0691957 


Coefficients:
                    Estimate  Naive S.E.   Naive z Robust S.E.  Robust z
(Intercept)      1.122761266 0.073815193 15.210436 0.072177241 15.555614
dasc            -0.017848494 0.004029852 -4.429069 0.003410110 -5.233993
infidelYes       0.505261009 0.144164750  3.504747 0.119537471  4.226800
genderHusband   -0.203380939 0.082501523 -2.465178 0.075600774 -2.690197
afcc             0.016503411 0.008203429  2.011770 0.007672575  2.150961
sexc             0.008513665 0.005466519  1.557420 0.005214695  1.632629
dasc:infidelYes  0.016748315 0.007282451  2.299818 0.006527212  2.565922

Estimated Scale Parameter:  1.958107
Number of Iterations:  3

Working Correlation
          [,1]      [,2]
[1,] 1.0000000 0.3356346
[2,] 0.3356346 1.0000000

Q4 geometry : A-level

dta4 <- read.table("http://140.116.183.121/~sheu/lmm/Data/geometryAL.txt", h = T)
dta4 <- dta4 %>% mutate(board = factor(board), gender = factor(gender), itype = factor(itype))

#plot
ggplot(dta4, aes(x = score)) +
  geom_histogram(aes(y = ..density..), binwidth = 1) +
  facet_grid(itype ~ board) +
  labs(x = "Geometry score")

ggplot(dta4, aes(x = score, fill = gender)) +
  geom_bar(position = "dodge") +
  labs(x = "Geometry score")

ggplot(dta4, aes(x = mgcse, y = score)) +
  geom_point(alpha = .3, size = .5) +
  stat_smooth(method = "glm") +
  facet_grid(itype ~ gender) +
  labs(x = "GCSE score")

#
dta4 <- dta4 %>% mutate(score = factor(score))
table(dta4$score)

   0    2    4    6    8   10 
3246 4271 6297 7885 7407 4170 
# model
#summary(m0 <- clmm(score ~ gender + age + mgcse + board + itype + (1 | iid), data = dta4))

#summary(m1 <- clmm(score ~ gender + age + board + itype + (1 | idd), data = dta4))