Q1 problematic drinking in college students
## problematic drinking in college students. Alcohol-related problems
#were recorded across 2 years (5 time points) for a sample of 881 students.
#log(E[RAPIit]) = b0i + β1 genderi + β2 monthit + β3 genderi × monthit + b2i monthit
#E[RAPIit] is the expected number of alcohol-related problems measured by RAPI for a student i at time t given gender
#Poisson distribution
p = c("tidyverse", "magrittr", "lme4", "ordinal", "vcrpart","glmmTMB")
lapply(p, library, character.only = TRUE)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## Attaching package: 'ordinal'
## The following objects are masked from 'package:lme4':
##
## ranef, VarCorr
## The following object is masked from 'package:dplyr':
##
## slice
## Loading required package: parallel
## Loading required package: partykit
## Loading required package: grid
##
## Attaching package: 'vcrpart'
## The following objects are masked from 'package:ordinal':
##
## ranef, VarCorr
## The following object is masked from 'package:magrittr':
##
## extract
## The following object is masked from 'package:tidyr':
##
## extract
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.12
## Current Matrix version is 1.2.11
## Please re-install 'TMB' from source or restore original 'Matrix' package
##
## Attaching package: 'glmmTMB'
## The following objects are masked from 'package:ordinal':
##
## ranef, VarCorr
## [[1]]
## [1] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [6] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [11] "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "magrittr" "dplyr" "purrr" "readr" "tidyr"
## [6] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [11] "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "lme4" "Matrix" "magrittr" "dplyr" "purrr"
## [6] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [11] "stats" "graphics" "grDevices" "utils" "datasets"
## [16] "methods" "base"
##
## [[4]]
## [1] "ordinal" "lme4" "Matrix" "magrittr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[5]]
## [1] "vcrpart" "partykit" "grid" "parallel" "ordinal"
## [6] "lme4" "Matrix" "magrittr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "glmmTMB" "vcrpart" "partykit" "grid" "parallel"
## [6] "ordinal" "lme4" "Matrix" "magrittr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
dat= read.table("http://140.116.183.121/~sheu/lmm/Data/rapi.csv",sep = ',', h = T)
str(dat)
## 'data.frame': 3616 obs. of 5 variables:
## $ ID : Factor w/ 818 levels "S1001","S1002",..: 1 1 1 2 2 2 2 2 3 3 ...
## $ Obs_ID: int 1 2 3 1 2 3 4 5 1 2 ...
## $ Gender: Factor w/ 2 levels "Men","Women": 1 1 1 2 2 2 2 2 1 1 ...
## $ Month : int 0 6 18 0 6 12 18 24 0 12 ...
## $ RAPI : int 0 0 0 3 6 5 4 5 9 1 ...
# set plot theme
ot <- theme_set(theme_bw())
#Plot
ggplot(dat, aes(x = RAPI)) +
geom_histogram(aes(y =..density..), binwidth = .5)+
facet_grid(Gender~Month)+
labs(x = "RAPI", y = "Density")

#Plot
ggplot(dat, aes(x =Month, y=RAPI, group = ID)) +
geom_point(alpha = .3) +
geom_line(size= .1 , alpha = .2) +
stat_smooth(aes(group = 1), method = "glm", se = F) +
facet_grid(~Gender)+
labs(x = "Month", y ="RAPI")

#每個人有所差異,男女之間也是
#plot 若使用poisson擬合,則每月平均的rapi分數與迴歸線會差多少
ggplot(dat, aes(x =Month, y=RAPI, color = Gender)) +
stat_summary(fun.data = "mean_se") +
stat_smooth(method = "glm", method.args = list(family = poisson)) +
labs(x = "Month", y ="RAPI") +
theme()

dat$YearC= dat$Month / 12
#Find a best-fitting zero-inflated Poisson model
summary(m0 <- glmmTMB(RAPI ~ Gender*YearC + (1 | ID),
data = dat, family = poisson))
## Family: poisson ( log )
## Formula: RAPI ~ Gender * YearC + (1 | ID)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 24094 24125 -12042 24084 3611
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## ID (Intercept) 1.04 1.02
## Number of obs: 3616, groups: ID, 818
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.56982 0.05822 26.965 < 2e-16 ***
## GenderWomen -0.22577 0.07696 -2.934 0.00335 **
## YearC -0.01429 0.01346 -1.062 0.28838
## GenderWomen:YearC -0.12145 0.01925 -6.311 2.78e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1 <- glmmTMB(RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID),
data = dat, family = poisson))
## Family: poisson ( log )
## Formula: RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 19447.9 19497.4 -9715.9 19431.9 3608
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## ID:Obs_ID (Intercept) 0.3765 0.6136
## ID (Intercept) 0.6777 0.8232
## YearC 0.3365 0.5801 0.11
## Number of obs: 3616, groups: ID:Obs_ID, 3616; ID, 818
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.58714 0.05688 27.904 < 2e-16 ***
## GenderWomen -0.20070 0.07445 -2.696 0.00702 **
## YearC -0.26393 0.04705 -5.609 2.03e-08 ***
## GenderWomen:YearC -0.19214 0.06120 -3.139 0.00169 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m2 <- glmmTMB(RAPI ~ Gender * YearC + (YearC - 1 | ID) + (1 | ID) + (1 | ID:Obs_ID),
data = dat, family = poisson))
## Family: poisson ( log )
## Formula:
## RAPI ~ Gender * YearC + (YearC - 1 | ID) + (1 | ID) + (1 | ID:Obs_ID)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 19448.1 19491.5 -9717.1 19434.1 3609
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## ID.Obs_ID (Intercept) 0.3688 0.6073
## ID (Intercept) 0.7215 0.8494
## ID.1 YearC 0.3586 0.5988
## Number of obs: 3616, groups: ID:Obs_ID, 3616; ID, 818
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.58187 0.05783 27.355 < 2e-16 ***
## GenderWomen -0.19980 0.07583 -2.635 0.00842 **
## YearC -0.25638 0.04741 -5.407 6.4e-08 ***
## GenderWomen:YearC -0.19333 0.06203 -3.117 0.00183 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bbmle::AICtab(m0, m1, m2, base=T, logLik = T)
## logLik AIC dLogLik dAIC df
## m1 -9715.9 19447.9 2326.1 0.0 8
## m2 -9717.1 19448.1 2325.0 0.2 7
## m0 -12042.0 24094.0 0.0 4646.1 5
summary(m3 <- glmmTMB(RAPI ~ Gender * YearC + (YearC | ID),
zi = ~ YearC,
data = dat, family = nbinom2))
## Family: nbinom2 ( log )
## Formula: RAPI ~ Gender * YearC + (YearC | ID)
## Zero inflation: ~YearC
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 19293.7 19355.7 -9636.9 19273.7 3606
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## ID (Intercept) 0.6598 0.8123
## YearC 0.2983 0.5462 0.15
## Number of obs: 3616, groups: ID, 818
##
## Overdispersion parameter for nbinom2 family (): 3.88
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.78328 0.05653 31.546 < 2e-16 ***
## GenderWomen -0.19028 0.07208 -2.640 0.008291 **
## YearC -0.22035 0.04693 -4.695 2.67e-06 ***
## GenderWomen:YearC -0.20228 0.05840 -3.464 0.000532 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Zero-inflation model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.3636 0.2638 -12.752 <2e-16 ***
## YearC 0.4283 0.1936 2.212 0.027 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Q2 adolescent amoking example
p = c("tidyverse", "magrittr", "lme4", "ordinal", "vcrpart")
lapply(p, library, character.only = TRUE)
## [[1]]
## [1] "glmmTMB" "vcrpart" "partykit" "grid" "parallel"
## [6] "ordinal" "lme4" "Matrix" "magrittr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[2]]
## [1] "glmmTMB" "vcrpart" "partykit" "grid" "parallel"
## [6] "ordinal" "lme4" "Matrix" "magrittr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[3]]
## [1] "glmmTMB" "vcrpart" "partykit" "grid" "parallel"
## [6] "ordinal" "lme4" "Matrix" "magrittr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[4]]
## [1] "glmmTMB" "vcrpart" "partykit" "grid" "parallel"
## [6] "ordinal" "lme4" "Matrix" "magrittr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[5]]
## [1] "glmmTMB" "vcrpart" "partykit" "grid" "parallel"
## [6] "ordinal" "lme4" "Matrix" "magrittr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##examine if the probability of smoking depends on sex, parental smoking and
#the wave of the study and how the probability of smoking varies from one individual adolescent to another.
dat = read.table("http://140.116.183.121/~sheu/lmm/Data/adolescentSmoking.txt", h = T)
f = c(1,2,3,4)
dat[,f] = lapply(dat[,f], as.factor)
dat$smkreg_f = factor(dat$smkreg)
#Plot
m = aggregate(smkreg~wave+sex, FUN = mean, data = dat)
ggplot(m, aes(x = wave, y = smkreg, group = sex, linetype = sex))+
geom_point()+
geom_line()+
theme_bw()+
ylim(0,.2)

#model
#smoking varies from one individual adolescent to another
summary(m<- glmer(smkreg_f~sex+parsmk+wave+(1|newid), family = "binomial", data = dat))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 4.75739 (tol =
## 0.001, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: smkreg_f ~ sex + parsmk + wave + (1 | newid)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 3803.4 3867.1 -1892.7 3785.4 8721
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4829 -0.0328 -0.0209 -0.0144 4.2244
##
## Random effects:
## Groups Name Variance Std.Dev.
## newid (Intercept) 40.98 6.402
## Number of obs: 8730, groups: newid, 1760
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.0360 0.3549 -22.641 < 2e-16 ***
## sex1 0.2196 0.2694 0.815 0.414994
## parsmk1 1.1726 0.2761 4.247 2.17e-05 ***
## wave2 -0.8710 0.2454 -3.550 0.000385 ***
## wave3 -0.3562 0.2394 -1.488 0.136722
## wave4 0.1828 0.2352 0.777 0.437006
## wave5 0.6205 0.2339 2.653 0.007969 **
## wave6 1.1000 0.2329 4.722 2.34e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sex1 prsmk1 wave2 wave3 wave4 wave5
## sex1 -0.415
## parsmk1 -0.261 0.033
## wave2 -0.381 -0.009 0.011
## wave3 -0.422 -0.007 0.009 0.642
## wave4 -0.464 -0.008 0.008 0.649 0.667
## wave5 -0.504 -0.007 0.007 0.647 0.670 0.693
## wave6 -0.548 -0.012 0.005 0.639 0.666 0.693 0.711
## convergence code: 0
## Model failed to converge with max|grad| = 4.75739 (tol = 0.001, component 1)
Q3 couple commitment example
##Use a generalized linear mixed model to analyze the couple commitment example.
dat = read.table("http://140.116.183.121/~sheu/lmm/Data/coupleCommitment.txt", h = T)
str(dat)
## 'data.frame': 263 obs. of 10 variables:
## $ couple : int 1 1 2 2 3 3 4 4 5 5 ...
## $ gender : int 0 1 0 1 0 1 0 1 0 1 ...
## $ msi : int 4 3 7 4 4 0 8 2 2 3 ...
## $ das : int 70 92 45 49 79 90 92 70 87 75 ...
## $ sex : int 61 64 69 68 49 56 66 68 42 56 ...
## $ afc : int 65 69 69 69 49 57 56 67 65 59 ...
## $ dasc : num -14.49 7.51 -39.49 -35.49 -5.49 ...
## $ sexc : num 0.932 3.932 8.932 7.932 -11.068 ...
## $ afcc : num 1.65 5.65 5.65 5.65 -14.35 ...
## $ infidel: int 0 0 0 0 0 0 0 0 0 0 ...
f = c(1,2,10)
dat[,f] = lapply(dat[,f], as.factor)
# recode factor levels
levels(dat$gender) <- c("Wife", "Husband")
levels(dat$infidel) <- c("no", "yes")
#Plot
ggplot(dat, aes(x = msi)) +
geom_histogram(binwidth=1)+
facet_grid(infidel~gender)+
labs(x = "Marital Status Inventory", y = "Count")

ggplot(dat, aes(x =dasc, y=msi, color = gender)) +
geom_point() +
stat_smooth(aes(group = gender), method = "glm", se = F) +
facet_grid(~infidel)+
labs(x = " Dyadic Adjustment Scale - centered", y ="Marital Status Inventory")

ggplot(dat, aes(x =dasc, y=msi, color = gender)) +
geom_point() +
stat_smooth(aes(group = gender), method = "glm", se = T) +
facet_grid(~infidel)+
labs(x = "Affective communication score - centered",y ="Marital Status Inventory")

ggplot(dat, aes(x =dasc, y=msi, color = gender)) +
geom_point() +
stat_smooth(aes(group = gender), method = "glm", se = T) +
facet_grid(~infidel)+
labs(x = "Sexsual dissatisfaction score- centered",y ="Marital Status Inventory")

##Log(μ(msi)ij) = α + β1 dasij + β2 infidelij + β3 genderj + β4 afcij + β5 sexdysij + β6 dasij×infidelij,
#where i is the couple idex, i = 1, 2, ..., 133; and j is the spouse index, j = 1,2;
#and the marital status inventory, msiij, is assumed to follow a Poisson distribution.
require(gee)
## Loading required package: gee
summary(m<-gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel,
id = couple, data = dat, family = poisson, corstr = "exchangeable"))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept) dasc infidelyes genderHusband
## 1.127447584 -0.017746288 0.525803868 -0.211613976
## afcc sexc dasc:infidelyes
## 0.020641206 0.006026088 0.020016627
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Logarithm
## Variance to Mean Relation: Poisson
## Correlation Structure: Exchangeable
##
## Call:
## gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel,
## id = couple, data = dat, family = poisson, corstr = "exchangeable")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -6.0738805 -1.7362610 -0.2607464 1.5689169 6.0691957
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## (Intercept) 1.122761266 0.073815193 15.210436 0.072177241 15.555614
## dasc -0.017848494 0.004029852 -4.429069 0.003410110 -5.233993
## infidelyes 0.505261009 0.144164750 3.504747 0.119537471 4.226800
## genderHusband -0.203380939 0.082501523 -2.465178 0.075600774 -2.690197
## afcc 0.016503411 0.008203429 2.011770 0.007672575 2.150961
## sexc 0.008513665 0.005466519 1.557420 0.005214695 1.632629
## dasc:infidelyes 0.016748315 0.007282451 2.299818 0.006527212 2.565922
##
## Estimated Scale Parameter: 1.958107
## Number of Iterations: 3
##
## Working Correlation
## [,1] [,2]
## [1,] 1.0000000 0.3356346
## [2,] 0.3356346 1.0000000
Q4 Geometry score
##model:ηij = b0i + β1 sexij + β2 ageij + β3 m.gcseij + β4-10 boardi + β11-20 Itypei,
#Pr{Yij=0} = F(-ηij);
#Pr{Yij=2} = F(c-ηij) - F(-ηij);
#Pr{Yij=4} = F(c+d1-ηij) - F(c-ηij);
#Pr{Yij=6} = F(c+d1+d2-ηij) - F(c+d1-ηij);
#Pr{Yij=8} = F(c+d1+d2+d3-ηij) - F(c+d1+d2-ηij);
#Pr{Yij=10} = 1 - F(c+d1+d2+d3-ηij),
#where i = 1, ..., 2,317; j = 1, ..., 33,276. The function F stands for the logistic cumulative probability distribution function.
dat = read.table("http://140.116.183.121/~sheu/lmm/Data/geometryAL.txt", h = T)
f = c(2,3,6,7)
dat[,f] = lapply(dat[,f], as.factor)
str(dat)
## 'data.frame': 33276 obs. of 7 variables:
## $ score : int 8 8 8 10 8 10 10 8 6 4 ...
## $ board : Factor w/ 7 levels "1","2","3","5",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ gender: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 1 ...
## $ age : int 1 -6 5 -1 -5 3 3 4 -2 1 ...
## $ mgcse : num 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.657 ...
## $ itype : Factor w/ 11 levels "1","2","3","4",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ iid : Factor w/ 2317 levels "1001","1002",..: 1 1 1 1 1 1 1 1 1 2 ...
p = c("tidyverse", "magrittr", "lme4","vcrpart")
lapply(p, library, character.only = TRUE)
## [[1]]
## [1] "gee" "glmmTMB" "vcrpart" "partykit" "grid"
## [6] "parallel" "ordinal" "lme4" "Matrix" "magrittr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "gee" "glmmTMB" "vcrpart" "partykit" "grid"
## [6] "parallel" "ordinal" "lme4" "Matrix" "magrittr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "gee" "glmmTMB" "vcrpart" "partykit" "grid"
## [6] "parallel" "ordinal" "lme4" "Matrix" "magrittr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "gee" "glmmTMB" "vcrpart" "partykit" "grid"
## [6] "parallel" "ordinal" "lme4" "Matrix" "magrittr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
#Plot
ggplot(dat, aes(x = score)) +
geom_histogram(aes(y =..density..),binwidth=1)+
facet_grid(itype~board)+
labs(x = "Geometry score", y = "Density")

# recode factor levels
levels(dat$gender) <- c("Male", "Female")
#plot
ggplot(dat, aes(x = score, fill=gender)) +
geom_histogram(position = "dodge", binwidth=1)+ #設置條形圖類型為簇狀,若沒設定則為堆積圖
labs(x = "Geometry score", y = "Count")

#plot
ggplot(dat, aes(x =mgcse, y=score)) +
geom_point(size=.7) +
stat_smooth(aes(group = 1), method = "glm", se = T) +
facet_grid(board~gender)+
labs(x = "GCSE score, mean-centered ",y ="Geometry score")

dat$score<-as.factor(dat$score)
summary(mt <- olmm(score~gender+age+mgcse+re(1|iid),
data = dat, family = cumulative(link = "logit")))
## Linear Mixed Model fit by Marginal Maximum
## Likelihood with Gauss-Hermite Quadrature
##
## Family: cumulative logit
## Formula: score ~ gender + age + mgcse + re(1 | iid)
## Data: dat
##
## Goodness of fit:
## AIC BIC logLik
## 110043.6 110119.3 -55012.78
##
## Random effects:
## Subject: iid
## Variance StdDev
## (Intercept) 0.2847814 0.5336492
## Number of obs: 33276, subjects: 2317
##
## Global fixed effects:
## Estimate Std. Error z value
## genderFemale -0.2788466 0.0194297 -14.3515
## age 0.0021999 0.0028603 0.7691
## mgcse -1.6664873 0.0339073 -49.1483
##
## Category-specific fixed effects:
## Estimate Std. Error z value
## 0|2:(Intercept) -2.357155 0.022811 -103.334
## 2|4:(Intercept) -1.243897 0.020793 -59.824
## 4|6:(Intercept) -0.219066 0.020336 -10.772
## 6|8:(Intercept) 0.908221 0.021368 42.504
## 8|10:(Intercept) 2.390568 0.022736 105.142
Q5 stop and frisk
dat = read.table("http://140.116.183.121/~sheu/lmm/Data/stopFrisk.txt", h = T)
str(dat)
## 'data.frame': 900 obs. of 6 variables:
## $ stops : int 75 36 74 17 37 39 23 3 26 32 ...
## $ pop : int 1720 1720 1720 1720 1368 1368 1368 1368 23854 23854 ...
## $ arrest : int 191 57 599 133 62 27 149 57 135 16 ...
## $ precinct: int 1 1 1 1 1 1 1 1 1 1 ...
## $ race : Factor w/ 3 levels "B","H","W": 1 1 1 1 2 2 2 2 3 3 ...
## $ crime : Factor w/ 4 levels "D","P","V","W": 3 4 2 1 3 4 2 1 3 4 ...
#plot
ggplot(dat, aes(x = stops)) +
geom_histogram(fill ="lightblue", colour = "black")+
labs(x = "Number of stops and frisk", y = "Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Plot
ggplot(dat, aes(x = stops)) +
geom_histogram(aes(y =..density..), fill ="lightblue", colour = "black")+
geom_density() +
facet_grid(race~crime)+
ylim(0,.005)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 9 rows containing missing values (geom_bar).

labs(x = "Number of stops and frisk", y = "Density")
## $x
## [1] "Number of stops and frisk"
##
## $y
## [1] "Density"
##
## attr(,"class")
## [1] "labels"
##Model: Yi = Poisson(uiθi),
#where Yi is the number of police stops at precinct i with rate θi relative to number of arrests ui in the previous year (an exposure).
#log(uiθi) = α + β1Raceb + β2Raceh + β3Crimed + β4Crimew + β5Crimep + β6Crimev + Σi=2 β6+i-1Precincti, i = 2, 3, ..., 75.
#Log(ui) = log((15/12)(# of arrests in 1997)i) is the offset term (scaled to a 15 months period)
summary(m0 <- glmer(stops ~ race+crime+(1|precinct), data = dat))
## Warning in glmer(stops ~ race + crime + (1 | precinct), data = dat):
## calling glmer() with family=gaussian (identity link) as a shortcut to
## lmer() is deprecated; please call lmer() directly
## Linear mixed model fit by REML ['lmerMod']
## Formula: stops ~ race + crime + (1 | precinct)
## Data: dat
##
## REML criterion at convergence: 11965.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6784 -0.5291 -0.1594 0.2504 7.2914
##
## Random effects:
## Groups Name Variance Std.Dev.
## precinct (Intercept) 2718 52.13
## Residual 34734 186.37
## Number of obs: 900, groups: precinct, 75
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 153.42 16.36 9.375
## raceH -84.00 15.22 -5.520
## raceW -176.16 15.22 -11.577
## crimeP 51.28 17.57 2.918
## crimeV 75.87 17.57 4.318
## crimeW 190.13 17.57 10.821
##
## Correlation of Fixed Effects:
## (Intr) raceH raceW crimeP crimeV
## raceH -0.465
## raceW -0.465 0.500
## crimeP -0.537 0.000 0.000
## crimeV -0.537 0.000 0.000 0.500
## crimeW -0.537 0.000 0.000 0.500 0.500
summary(m <- glmer(stops ~ race+crime+(1|precinct), data = dat, family = poisson))
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: stops ~ race + crime + (1 | precinct)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 90378.1 90411.7 -45182.0 90364.1 893
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -19.957 -6.305 -2.187 4.215 73.227
##
## Random effects:
## Groups Name Variance Std.Dev.
## precinct (Intercept) 0.3368 0.5803
## Number of obs: 900, groups: precinct, 75
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.518129 0.067580 66.86 <2e-16 ***
## raceH -0.447715 0.006060 -73.88 <2e-16 ***
## raceW -1.414281 0.008557 -165.29 <2e-16 ***
## crimeP 0.570296 0.010211 55.85 <2e-16 ***
## crimeV 0.759587 0.009888 76.82 <2e-16 ***
## crimeW 1.348198 0.009160 147.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) raceH raceW crimeP crimeV
## raceH -0.035
## raceW -0.025 0.276
## crimeP -0.097 0.000 0.000
## crimeV -0.100 0.000 0.000 0.660
## crimeW -0.108 0.000 0.000 0.712 0.735
anova(m, m0)
## refitting model(s) with ML (instead of REML)
## Data: dat
## Models:
## m: stops ~ race + crime + (1 | precinct)
## m0: stops ~ race + crime + (1 | precinct)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m 7 90378 90412 -45182 90364
## m0 8 12024 12062 -6004 12008 78356 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1