1127_glmm

#peijun

Refer to the questions here.

Q1 Alcoholic problem (zero-inflated poisson vs negbin)

dta <- read.csv("rapi.csv") dta$YearC <- (dta$Month - mean(unique(dta$Month))) / 12 dta %>% group_by(Gender, Month) %>% summarize( mean(RAPI), var(RAPI), sum(RAPI < 1)/n()) %>% as.data.frame

Gender Month mean(RAPI) var(RAPI) sum(RAPI < 1)/n() 1 Men 0 7.700288 73.80587 0.11815562 2 Men 6 8.214744 106.43284 0.14423077 3 Men 12 8.039146 130.83060 0.18149466 4 Men 18 8.107914 165.65618 0.22661871 5 Men 24 7.294776 129.84162 0.25373134 6 Women 0 6.335456 49.15957 0.09766454 7 Women 6 5.300683 45.99614 0.19817768 8 Women 12 4.632319 41.38327 0.25058548 9 Women 18 4.990099 62.33737 0.27722772 10 Women 24 4.652956 79.29420 0.34961440

# Compare distributions with different counts ggplot(dta, aes(RAPI, ..density..)) + stat_bin(binwidth = 0.9) + facet_grid(Gender ~ as.factor(Month)) + labs(x = "Rutgers Alcohol Problem Index") + theme_set(theme_bw())

# negative binomial, zero-inflated negative binomial mnb1 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID), data = dta, family = nbinom1) # variance to mean with a quadratic trend mnb2 <- update(mnb1, . ~ ., family = nbinom2) # Include zero-inflation term mznb2 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID), zi = ~ Gender + YearC, data = dta, family = nbinom2) # add dispersion over time mznb2a <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID), data = dta, zi = ~ YearC, dispformula = ~ YearC, family = nbinom2) ### # zero-inflated poisson m1 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID), zi = ~ Gender + YearC, data = dta, family = poisson) # model comparisions - across different models AICtab(mnb1, mnb2, mznb2, mznb2a, m1, base = T, logLik = T)

logLik AIC dLogLik dAIC df mznb2a -9634.5 19291.0 598.4 0.0 11 mznb2 -9636.6 19297.2 596.3 6.2 12 mnb2 -9686.8 19391.5 546.1 100.6 9 mnb1 -9699.0 19416.0 533.9 125.0 9 m1 -10232.9 20485.7 0.0 1194.8 10

Q2 adolescent smoking

dta2 <- read.table("http://140.116.183.121/~sheu/lmm/Data/adolescentSmoking.txt", h = T) dta2 <- dta2 %>% mutate(sex = factor(sex), parsmk = factor(parsmk), timeC = scale(wave), wave = factor(wave))　%>% group_by(sex, wave) %>% mutate(smkregM = mean(smkreg)) %>% as.data.frame #plot ggplot(dta2, aes(wave, smkregM, group = sex, linetype = sex)) + geom_point() + geom_line() + ylim(0, 0.2) + labs(x = "wave of study", y = "proportion of smokers")

#model m0 <- glmer(smkreg ~ sex + parsmk + wave + (1| newid), data = dta2, family = binomial)

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control $checkConv, : Model failed to converge with max|grad| = 4.75739 (tol = 0.001, component 1)

summary(m0)

Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [glmerMod] Family: binomial ( logit ) Formula: smkreg ~ sex + parsmk + wave + (1 | newid) Data: dta2 AIC BIC logLik deviance df.resid 3803.4 3867.1 -1892.7 3785.4 8721 Scaled residuals: Min 1Q Median 3Q Max -3.4829 -0.0328 -0.0209 -0.0144 4.2244 Random effects: Groups Name Variance Std.Dev. newid (Intercept) 40.98 6.402 Number of obs: 8730, groups: newid, 1760 Fixed effects: Estimate Std. Error z value Pr(>|z|) (Intercept) -8.0360 0.3549 -22.641 < 2e-16 *** sex1 0.2196 0.2694 0.815 0.414994 parsmk1 1.1726 0.2761 4.247 2.17e-05 *** wave2 -0.8710 0.2454 -3.550 0.000385 *** wave3 -0.3562 0.2394 -1.488 0.136722 wave4 0.1828 0.2352 0.777 0.437006 wave5 0.6205 0.2339 2.653 0.007969 ** wave6 1.1000 0.2329 4.722 2.34e-06 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Correlation of Fixed Effects: (Intr) sex1 prsmk1 wave2 wave3 wave4 wave5 sex1 -0.415 parsmk1 -0.261 0.033 wave2 -0.381 -0.009 0.011 wave3 -0.422 -0.007 0.009 0.642 wave4 -0.464 -0.008 0.008 0.649 0.667 wave5 -0.504 -0.007 0.007 0.647 0.670 0.693 wave6 -0.548 -0.012 0.005 0.639 0.666 0.693 0.711 convergence code: 0 Model failed to converge with max|grad| = 4.75739 (tol = 0.001, component 1)

m1 <- glmer(smkreg ~ sex + parsmk + timeC + (1| newid), data = dta2, family = binomial) summary(m1)

Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) [glmerMod] Family: binomial ( logit ) Formula: smkreg ~ sex + parsmk + timeC + (1 | newid) Data: dta2 AIC BIC logLik deviance df.resid 3693.7 3729.1 -1841.8 3683.7 8725 Scaled residuals: Min 1Q Median 3Q Max -4.8966 -0.0155 -0.0093 -0.0053 5.5417 Random effects: Groups Name Variance Std.Dev. newid (Intercept) 95.78 9.787 Number of obs: 8730, groups: newid, 1760 Fixed effects: Estimate Std. Error z value Pr(>|z|) (Intercept) -9.5980 0.3675 -26.120 < 2e-16 *** sex1 0.1196 0.3326 0.360 0.71904 parsmk1 1.0863 0.3333 3.259 0.00112 ** timeC 0.9090 0.0710 12.803 < 2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Correlation of Fixed Effects: (Intr) sex1 prsmk1 sex1 -0.501 parsmk1 -0.387 0.028 timeC -0.262 -0.006 -0.005

Q3 couple commitment

dta3 <- read.table("http://140.116.183.121/~sheu/lmm/Data/coupleCommitment.txt", h =T) dta3 <- dta3 %>% mutate(couple = factor(couple), gender = factor(gender, labels = c("Wife", "Husband")), infidel = factor(infidel, labels = c("No", "Yes"))) # plot ggplot(dta3, aes(x = msi)) + geom_histogram(binwidth = 1) + facet_grid(infidel ~ gender) + labs(x = "Marital Status Inventory")

ggplot(dta3, aes(x = dasc, y = msi, color = gender)) + geom_point() + stat_smooth(method = "lm", se = F) + facet_wrap(~infidel) + labs(x = "Dyadic adjustment scale - centered", y = "Marital Status Inventory")

ggplot(dta3, aes(x = dasc, y = msi, color = gender)) + geom_point() + stat_smooth(method = "lm", se = F) + facet_wrap(~infidel) + labs(x = "Dyadic adjustment scale - centered", y = "Marital Status Inventory")

ggplot(dta3, aes(x = afcc, y = msi, color = gender)) + geom_point() + stat_smooth(method = "lm", se = T) + facet_wrap(~infidel) + labs(x = "Affection communication scale - centered", y = "Marital Status Inventory")

ggplot(dta3, aes(x = sexc, y = msi, color = gender)) + geom_point() + stat_smooth(method = "lm", se = T) + facet_wrap(~infidel) + labs(x = "Sexual dyssatisfaction scale - centered", y = "Marital Status Inventory")

# model m0 <- gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, id = couple, data = dta3, family = poisson, corstr = "exchangeable")

Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27

running glm to get initial regression estimate

(Intercept) dasc infidelYes genderHusband 1.127447584 -0.017746288 0.525803868 -0.211613976 afcc sexc dasc:infidelYes 0.020641206 0.006026088 0.020016627

summary(m0)

GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA gee S-function, version 4.13 modified 98/01/27 (1998) Model: Link: Logarithm Variance to Mean Relation: Poisson Correlation Structure: Exchangeable Call: gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, id = couple, data = dta3, family = poisson, corstr = "exchangeable") Summary of Residuals: Min 1Q Median 3Q Max -6.0738805 -1.7362610 -0.2607464 1.5689169 6.0691957 Coefficients: Estimate Naive S.E. Naive z Robust S.E. Robust z (Intercept) 1.122761266 0.073815193 15.210436 0.072177241 15.555614 dasc -0.017848494 0.004029852 -4.429069 0.003410110 -5.233993 infidelYes 0.505261009 0.144164750 3.504747 0.119537471 4.226800 genderHusband -0.203380939 0.082501523 -2.465178 0.075600774 -2.690197 afcc 0.016503411 0.008203429 2.011770 0.007672575 2.150961 sexc 0.008513665 0.005466519 1.557420 0.005214695 1.632629 dasc:infidelYes 0.016748315 0.007282451 2.299818 0.006527212 2.565922 Estimated Scale Parameter: 1.958107 Number of Iterations: 3 Working Correlation [,1] [,2] [1,] 1.0000000 0.3356346 [2,] 0.3356346 1.0000000

Q4 geometry : A-level

dta4 <- read.table("http://140.116.183.121/~sheu/lmm/Data/geometryAL.txt", h = T) dta4 <- dta4 %>% mutate(board = factor(board), gender = factor(gender), itype = factor(itype)) #plot ggplot(dta4, aes(x = score)) + geom_histogram(aes(y = ..density..), binwidth = 1) + facet_grid(itype ~ board) + labs(x = "Geometry score")

ggplot(dta4, aes(x = score, fill = gender)) + geom_bar(position = "dodge") + labs(x = "Geometry score")

ggplot(dta4, aes(x = mgcse, y = score)) + geom_point(alpha = .3, size = .5) + stat_smooth(method = "glm") + facet_grid(itype ~ gender) + labs(x = "GCSE score")

# dta4 <- dta4 %>% mutate(score = factor(score)) table(dta4$score)

0 2 4 6 8 10 3246 4271 6297 7885 7407 4170

# model #summary(m0 <- clmm(score ~ gender + age + mgcse + board + itype + (1 | iid), data = dta4)) #summary(m1 <- clmm(score ~ gender + age + board + itype + (1 | idd), data = dta4))