#peijun
Refer to the questions here.
dta <- read.csv("rapi.csv")
dta$YearC <- (dta$Month - mean(unique(dta$Month))) / 12
dta %>%
group_by(Gender, Month) %>%
summarize( mean(RAPI), var(RAPI), sum(RAPI < 1)/n()) %>%
as.data.frame
Gender Month mean(RAPI) var(RAPI) sum(RAPI < 1)/n()
1 Men 0 7.700288 73.80587 0.11815562
2 Men 6 8.214744 106.43284 0.14423077
3 Men 12 8.039146 130.83060 0.18149466
4 Men 18 8.107914 165.65618 0.22661871
5 Men 24 7.294776 129.84162 0.25373134
6 Women 0 6.335456 49.15957 0.09766454
7 Women 6 5.300683 45.99614 0.19817768
8 Women 12 4.632319 41.38327 0.25058548
9 Women 18 4.990099 62.33737 0.27722772
10 Women 24 4.652956 79.29420 0.34961440
# Compare distributions with different counts
ggplot(dta, aes(RAPI, ..density..)) +
stat_bin(binwidth = 0.9) +
facet_grid(Gender ~ as.factor(Month)) +
labs(x = "Rutgers Alcohol Problem Index") +
theme_set(theme_bw())
# negative binomial, zero-inflated negative binomial
mnb1 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
data = dta, family = nbinom1)
# variance to mean with a quadratic trend
mnb2 <- update(mnb1, . ~ ., family = nbinom2)
# Include zero-inflation term
mznb2 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID) + (1 | ID:Obs_ID),
zi = ~ Gender + YearC,
data = dta, family = nbinom2)
# add dispersion over time
mznb2a <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID), data = dta,
zi = ~ YearC,
dispformula = ~ YearC,
family = nbinom2)
###
# zero-inflated poisson
m1 <- glmmTMB(RAPI ~ Gender*YearC + (YearC | ID),
zi = ~ Gender + YearC,
data = dta, family = poisson)
# model comparisions - across different models
AICtab(mnb1, mnb2, mznb2, mznb2a, m1, base = T, logLik = T)
logLik AIC dLogLik dAIC df
mznb2a -9634.5 19291.0 598.4 0.0 11
mznb2 -9636.6 19297.2 596.3 6.2 12
mnb2 -9686.8 19391.5 546.1 100.6 9
mnb1 -9699.0 19416.0 533.9 125.0 9
m1 -10232.9 20485.7 0.0 1194.8 10
dta2 <- read.table("http://140.116.183.121/~sheu/lmm/Data/adolescentSmoking.txt", h = T)
dta2 <- dta2 %>% mutate(sex = factor(sex), parsmk = factor(parsmk), timeC = scale(wave), wave = factor(wave)) %>%
group_by(sex, wave) %>%
mutate(smkregM = mean(smkreg)) %>%
as.data.frame
#plot
ggplot(dta2, aes(wave, smkregM, group = sex, linetype = sex)) +
geom_point() +
geom_line() +
ylim(0, 0.2) +
labs(x = "wave of study", y = "proportion of smokers")
#model
m0 <- glmer(smkreg ~ sex + parsmk + wave + (1| newid), data = dta2,
family = binomial)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
$checkConv, : Model failed to converge with max|grad| = 4.75739 (tol =
0.001, component 1)
summary(m0)
Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: binomial ( logit )
Formula: smkreg ~ sex + parsmk + wave + (1 | newid)
Data: dta2
AIC BIC logLik deviance df.resid
3803.4 3867.1 -1892.7 3785.4 8721
Scaled residuals:
Min 1Q Median 3Q Max
-3.4829 -0.0328 -0.0209 -0.0144 4.2244
Random effects:
Groups Name Variance Std.Dev.
newid (Intercept) 40.98 6.402
Number of obs: 8730, groups: newid, 1760
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -8.0360 0.3549 -22.641 < 2e-16 ***
sex1 0.2196 0.2694 0.815 0.414994
parsmk1 1.1726 0.2761 4.247 2.17e-05 ***
wave2 -0.8710 0.2454 -3.550 0.000385 ***
wave3 -0.3562 0.2394 -1.488 0.136722
wave4 0.1828 0.2352 0.777 0.437006
wave5 0.6205 0.2339 2.653 0.007969 **
wave6 1.1000 0.2329 4.722 2.34e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) sex1 prsmk1 wave2 wave3 wave4 wave5
sex1 -0.415
parsmk1 -0.261 0.033
wave2 -0.381 -0.009 0.011
wave3 -0.422 -0.007 0.009 0.642
wave4 -0.464 -0.008 0.008 0.649 0.667
wave5 -0.504 -0.007 0.007 0.647 0.670 0.693
wave6 -0.548 -0.012 0.005 0.639 0.666 0.693 0.711
convergence code: 0
Model failed to converge with max|grad| = 4.75739 (tol = 0.001, component 1)
m1 <- glmer(smkreg ~ sex + parsmk + timeC + (1| newid), data = dta2,
family = binomial)
summary(m1)
Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: binomial ( logit )
Formula: smkreg ~ sex + parsmk + timeC + (1 | newid)
Data: dta2
AIC BIC logLik deviance df.resid
3693.7 3729.1 -1841.8 3683.7 8725
Scaled residuals:
Min 1Q Median 3Q Max
-4.8966 -0.0155 -0.0093 -0.0053 5.5417
Random effects:
Groups Name Variance Std.Dev.
newid (Intercept) 95.78 9.787
Number of obs: 8730, groups: newid, 1760
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -9.5980 0.3675 -26.120 < 2e-16 ***
sex1 0.1196 0.3326 0.360 0.71904
parsmk1 1.0863 0.3333 3.259 0.00112 **
timeC 0.9090 0.0710 12.803 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) sex1 prsmk1
sex1 -0.501
parsmk1 -0.387 0.028
timeC -0.262 -0.006 -0.005
dta3 <- read.table("http://140.116.183.121/~sheu/lmm/Data/coupleCommitment.txt", h =T)
dta3 <- dta3 %>% mutate(couple = factor(couple), gender = factor(gender, labels = c("Wife", "Husband")), infidel = factor(infidel, labels = c("No", "Yes")))
# plot
ggplot(dta3, aes(x = msi)) +
geom_histogram(binwidth = 1) +
facet_grid(infidel ~ gender) +
labs(x = "Marital Status Inventory")
ggplot(dta3, aes(x = dasc, y = msi, color = gender)) +
geom_point() +
stat_smooth(method = "lm", se = F) +
facet_wrap(~infidel) +
labs(x = "Dyadic adjustment scale - centered", y = "Marital Status Inventory")
ggplot(dta3, aes(x = dasc, y = msi, color = gender)) +
geom_point() +
stat_smooth(method = "lm", se = F) +
facet_wrap(~infidel) +
labs(x = "Dyadic adjustment scale - centered", y = "Marital Status Inventory")
ggplot(dta3, aes(x = afcc, y = msi, color = gender)) +
geom_point() +
stat_smooth(method = "lm", se = T) +
facet_wrap(~infidel) +
labs(x = "Affection communication scale - centered", y = "Marital Status Inventory")
ggplot(dta3, aes(x = sexc, y = msi, color = gender)) +
geom_point() +
stat_smooth(method = "lm", se = T) +
facet_wrap(~infidel) +
labs(x = "Sexual dyssatisfaction scale - centered", y = "Marital Status Inventory")
# model
m0 <- gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel,
id = couple, data = dta3, family = poisson, corstr = "exchangeable")
Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
running glm to get initial regression estimate
(Intercept) dasc infidelYes genderHusband
1.127447584 -0.017746288 0.525803868 -0.211613976
afcc sexc dasc:infidelYes
0.020641206 0.006026088 0.020016627
summary(m0)
GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
gee S-function, version 4.13 modified 98/01/27 (1998)
Model:
Link: Logarithm
Variance to Mean Relation: Poisson
Correlation Structure: Exchangeable
Call:
gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel,
id = couple, data = dta3, family = poisson, corstr = "exchangeable")
Summary of Residuals:
Min 1Q Median 3Q Max
-6.0738805 -1.7362610 -0.2607464 1.5689169 6.0691957
Coefficients:
Estimate Naive S.E. Naive z Robust S.E. Robust z
(Intercept) 1.122761266 0.073815193 15.210436 0.072177241 15.555614
dasc -0.017848494 0.004029852 -4.429069 0.003410110 -5.233993
infidelYes 0.505261009 0.144164750 3.504747 0.119537471 4.226800
genderHusband -0.203380939 0.082501523 -2.465178 0.075600774 -2.690197
afcc 0.016503411 0.008203429 2.011770 0.007672575 2.150961
sexc 0.008513665 0.005466519 1.557420 0.005214695 1.632629
dasc:infidelYes 0.016748315 0.007282451 2.299818 0.006527212 2.565922
Estimated Scale Parameter: 1.958107
Number of Iterations: 3
Working Correlation
[,1] [,2]
[1,] 1.0000000 0.3356346
[2,] 0.3356346 1.0000000
dta4 <- read.table("http://140.116.183.121/~sheu/lmm/Data/geometryAL.txt", h = T)
dta4 <- dta4 %>% mutate(board = factor(board), gender = factor(gender), itype = factor(itype))
#plot
ggplot(dta4, aes(x = score)) +
geom_histogram(aes(y = ..density..), binwidth = 1) +
facet_grid(itype ~ board) +
labs(x = "Geometry score")
ggplot(dta4, aes(x = score, fill = gender)) +
geom_bar(position = "dodge") +
labs(x = "Geometry score")
ggplot(dta4, aes(x = mgcse, y = score)) +
geom_point(alpha = .3, size = .5) +
stat_smooth(method = "glm") +
facet_grid(itype ~ gender) +
labs(x = "GCSE score")
#
dta4 <- dta4 %>% mutate(score = factor(score))
table(dta4$score)
0 2 4 6 8 10
3246 4271 6297 7885 7407 4170
# model
#summary(m0 <- clmm(score ~ gender + age + mgcse + board + itype + (1 | iid), data = dta4))
#summary(m1 <- clmm(score ~ gender + age + board + itype + (1 | idd), data = dta4))