Q1 problematic drinking in college students

## problematic drinking in college students. Alcohol-related problems
 #were recorded across 2 years (5 time points) for a sample of 881 students.

 #log(E[RAPIit]) = b0i + β1 genderi + β2 monthit + β3 genderi × monthit + b2i monthit
 #E[RAPIit] is the expected number of alcohol-related problems measured by RAPI for a student i at time t given gender
 #Poisson distribution
p = c("tidyverse", "magrittr", "lme4", "ordinal", "vcrpart","glmmTMB")
lapply(p, library, character.only = TRUE)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## Attaching package: 'ordinal'
## The following objects are masked from 'package:lme4':
## 
##     ranef, VarCorr
## The following object is masked from 'package:dplyr':
## 
##     slice
## Loading required package: parallel
## Loading required package: partykit
## Loading required package: grid
## 
## Attaching package: 'vcrpart'
## The following objects are masked from 'package:ordinal':
## 
##     ranef, VarCorr
## The following object is masked from 'package:magrittr':
## 
##     extract
## The following object is masked from 'package:tidyr':
## 
##     extract
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.12
## Current Matrix version is 1.2.11
## Please re-install 'TMB' from source or restore original 'Matrix' package
## 
## Attaching package: 'glmmTMB'
## The following objects are masked from 'package:ordinal':
## 
##     ranef, VarCorr
## [[1]]
##  [1] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"   
##  [6] "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices"
## [11] "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "magrittr"  "dplyr"     "purrr"     "readr"     "tidyr"    
##  [6] "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [11] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "lme4"      "Matrix"    "magrittr"  "dplyr"     "purrr"    
##  [6] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [11] "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [16] "methods"   "base"     
## 
## [[4]]
##  [1] "ordinal"   "lme4"      "Matrix"    "magrittr"  "dplyr"    
##  [6] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [11] "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [16] "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "vcrpart"   "partykit"  "grid"      "parallel"  "ordinal"  
##  [6] "lme4"      "Matrix"    "magrittr"  "dplyr"     "purrr"    
## [11] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [16] "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [21] "methods"   "base"     
## 
## [[6]]
##  [1] "glmmTMB"   "vcrpart"   "partykit"  "grid"      "parallel" 
##  [6] "ordinal"   "lme4"      "Matrix"    "magrittr"  "dplyr"    
## [11] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [16] "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [21] "datasets"  "methods"   "base"
dat= read.table("http://140.116.183.121/~sheu/lmm/Data/rapi.csv",sep = ',',  h = T)
str(dat)
## 'data.frame':    3616 obs. of  5 variables:
##  $ ID    : Factor w/ 818 levels "S1001","S1002",..: 1 1 1 2 2 2 2 2 3 3 ...
##  $ Obs_ID: int  1 2 3 1 2 3 4 5 1 2 ...
##  $ Gender: Factor w/ 2 levels "Men","Women": 1 1 1 2 2 2 2 2 1 1 ...
##  $ Month : int  0 6 18 0 6 12 18 24 0 12 ...
##  $ RAPI  : int  0 0 0 3 6 5 4 5 9 1 ...
# set plot theme
ot <- theme_set(theme_bw())
#Plot
ggplot(dat, aes(x = RAPI)) +
  geom_histogram(aes(y =..density..), binwidth = .5)+
  facet_grid(Gender~Month)+
  labs(x = "RAPI", y = "Density")

#Plot
ggplot(dat, aes(x =Month, y=RAPI, group = ID)) +
  geom_point(alpha = .3) +
  geom_line(size= .1 , alpha = .2) +
  stat_smooth(aes(group = 1), method =  "glm", se = F) +
  facet_grid(~Gender)+
  labs(x = "Month", y ="RAPI")

#每個人有所差異,男女之間也是

#plot  若使用poisson擬合,則每月平均的rapi分數與迴歸線會差多少
ggplot(dat, aes(x =Month, y=RAPI, color = Gender)) +
  stat_summary(fun.data = "mean_se") +
  stat_smooth(method = "glm", method.args = list(family = poisson)) +
  labs(x = "Month", y ="RAPI") +
  theme()

dat$YearC= dat$Month / 12

#Find a best-fitting zero-inflated Poisson model
summary(m0 <- glmmTMB(RAPI ~ Gender*YearC + (1 | ID), 
                      data = dat, family = poisson))
##  Family: poisson  ( log )
## Formula:          RAPI ~ Gender * YearC + (1 | ID)
## Data: dat
## 
##      AIC      BIC   logLik deviance df.resid 
##    24094    24125   -12042    24084     3611 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 1.04     1.02    
## Number of obs: 3616, groups:  ID, 818
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.56982    0.05822  26.965  < 2e-16 ***
## GenderWomen       -0.22577    0.07696  -2.934  0.00335 ** 
## YearC             -0.01429    0.01346  -1.062  0.28838    
## GenderWomen:YearC -0.12145    0.01925  -6.311 2.78e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m1 <- glmmTMB(RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID), 
                      data = dat, family = poisson))
##  Family: poisson  ( log )
## Formula:          RAPI ~ Gender * YearC + (YearC | ID) + (1 | ID:Obs_ID)
## Data: dat
## 
##      AIC      BIC   logLik deviance df.resid 
##  19447.9  19497.4  -9715.9  19431.9     3608 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance Std.Dev. Corr
##  ID:Obs_ID (Intercept) 0.3765   0.6136       
##  ID        (Intercept) 0.6777   0.8232       
##            YearC       0.3365   0.5801   0.11
## Number of obs: 3616, groups:  ID:Obs_ID, 3616; ID, 818
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.58714    0.05688  27.904  < 2e-16 ***
## GenderWomen       -0.20070    0.07445  -2.696  0.00702 ** 
## YearC             -0.26393    0.04705  -5.609 2.03e-08 ***
## GenderWomen:YearC -0.19214    0.06120  -3.139  0.00169 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m2 <- glmmTMB(RAPI ~ Gender * YearC + (YearC - 1 | ID) + (1 | ID) + (1 | ID:Obs_ID), 
                      data = dat, family = poisson))
##  Family: poisson  ( log )
## Formula:          
## RAPI ~ Gender * YearC + (YearC - 1 | ID) + (1 | ID) + (1 | ID:Obs_ID)
## Data: dat
## 
##      AIC      BIC   logLik deviance df.resid 
##  19448.1  19491.5  -9717.1  19434.1     3609 
## 
## Random effects:
## 
## Conditional model:
##  Groups    Name        Variance Std.Dev.
##  ID.Obs_ID (Intercept) 0.3688   0.6073  
##  ID        (Intercept) 0.7215   0.8494  
##  ID.1      YearC       0.3586   0.5988  
## Number of obs: 3616, groups:  ID:Obs_ID, 3616; ID, 818
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.58187    0.05783  27.355  < 2e-16 ***
## GenderWomen       -0.19980    0.07583  -2.635  0.00842 ** 
## YearC             -0.25638    0.04741  -5.407  6.4e-08 ***
## GenderWomen:YearC -0.19333    0.06203  -3.117  0.00183 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bbmle::AICtab(m0, m1, m2, base=T, logLik = T)
##    logLik   AIC      dLogLik  dAIC     df
## m1  -9715.9  19447.9   2326.1      0.0 8 
## m2  -9717.1  19448.1   2325.0      0.2 7 
## m0 -12042.0  24094.0      0.0   4646.1 5
summary(m3 <- glmmTMB(RAPI ~ Gender * YearC + (YearC | ID), 
                      zi = ~ YearC, 
                      data = dat, family = nbinom2))
##  Family: nbinom2  ( log )
## Formula:          RAPI ~ Gender * YearC + (YearC | ID)
## Zero inflation:        ~YearC
## Data: dat
## 
##      AIC      BIC   logLik deviance df.resid 
##  19293.7  19355.7  -9636.9  19273.7     3606 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev. Corr
##  ID     (Intercept) 0.6598   0.8123       
##         YearC       0.2983   0.5462   0.15
## Number of obs: 3616, groups:  ID, 818
## 
## Overdispersion parameter for nbinom2 family (): 3.88 
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.78328    0.05653  31.546  < 2e-16 ***
## GenderWomen       -0.19028    0.07208  -2.640 0.008291 ** 
## YearC             -0.22035    0.04693  -4.695 2.67e-06 ***
## GenderWomen:YearC -0.20228    0.05840  -3.464 0.000532 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Zero-inflation model:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.3636     0.2638 -12.752   <2e-16 ***
## YearC         0.4283     0.1936   2.212    0.027 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Q2 adolescent amoking example

p = c("tidyverse", "magrittr", "lme4", "ordinal", "vcrpart")
lapply(p, library, character.only = TRUE)
## [[1]]
##  [1] "glmmTMB"   "vcrpart"   "partykit"  "grid"      "parallel" 
##  [6] "ordinal"   "lme4"      "Matrix"    "magrittr"  "dplyr"    
## [11] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [16] "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [21] "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "glmmTMB"   "vcrpart"   "partykit"  "grid"      "parallel" 
##  [6] "ordinal"   "lme4"      "Matrix"    "magrittr"  "dplyr"    
## [11] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [16] "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [21] "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "glmmTMB"   "vcrpart"   "partykit"  "grid"      "parallel" 
##  [6] "ordinal"   "lme4"      "Matrix"    "magrittr"  "dplyr"    
## [11] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [16] "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [21] "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "glmmTMB"   "vcrpart"   "partykit"  "grid"      "parallel" 
##  [6] "ordinal"   "lme4"      "Matrix"    "magrittr"  "dplyr"    
## [11] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [16] "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [21] "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "glmmTMB"   "vcrpart"   "partykit"  "grid"      "parallel" 
##  [6] "ordinal"   "lme4"      "Matrix"    "magrittr"  "dplyr"    
## [11] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [16] "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [21] "datasets"  "methods"   "base"
##examine if the probability of smoking depends on sex, parental smoking and 
 #the wave of the study and how the probability of smoking varies from one individual adolescent to another.

dat = read.table("http://140.116.183.121/~sheu/lmm/Data/adolescentSmoking.txt", h = T)
f = c(1,2,3,4)
dat[,f] = lapply(dat[,f], as.factor)
dat$smkreg_f = factor(dat$smkreg)

#Plot
m = aggregate(smkreg~wave+sex, FUN = mean, data = dat)

ggplot(m, aes(x = wave, y = smkreg, group = sex, linetype = sex))+
  geom_point()+
  geom_line()+
  theme_bw()+
  ylim(0,.2)

#model
#smoking varies from one individual adolescent to another
summary(m<- glmer(smkreg_f~sex+parsmk+wave+(1|newid), family = "binomial", data = dat))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 4.75739 (tol =
## 0.001, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: smkreg_f ~ sex + parsmk + wave + (1 | newid)
##    Data: dat
## 
##      AIC      BIC   logLik deviance df.resid 
##   3803.4   3867.1  -1892.7   3785.4     8721 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4829 -0.0328 -0.0209 -0.0144  4.2244 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  newid  (Intercept) 40.98    6.402   
## Number of obs: 8730, groups:  newid, 1760
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -8.0360     0.3549 -22.641  < 2e-16 ***
## sex1          0.2196     0.2694   0.815 0.414994    
## parsmk1       1.1726     0.2761   4.247 2.17e-05 ***
## wave2        -0.8710     0.2454  -3.550 0.000385 ***
## wave3        -0.3562     0.2394  -1.488 0.136722    
## wave4         0.1828     0.2352   0.777 0.437006    
## wave5         0.6205     0.2339   2.653 0.007969 ** 
## wave6         1.1000     0.2329   4.722 2.34e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) sex1   prsmk1 wave2  wave3  wave4  wave5 
## sex1    -0.415                                          
## parsmk1 -0.261  0.033                                   
## wave2   -0.381 -0.009  0.011                            
## wave3   -0.422 -0.007  0.009  0.642                     
## wave4   -0.464 -0.008  0.008  0.649  0.667              
## wave5   -0.504 -0.007  0.007  0.647  0.670  0.693       
## wave6   -0.548 -0.012  0.005  0.639  0.666  0.693  0.711
## convergence code: 0
## Model failed to converge with max|grad| = 4.75739 (tol = 0.001, component 1)

Q3 couple commitment example

##Use a generalized linear mixed model to analyze the couple commitment example.

dat = read.table("http://140.116.183.121/~sheu/lmm/Data/coupleCommitment.txt", h = T)
str(dat)
## 'data.frame':    263 obs. of  10 variables:
##  $ couple : int  1 1 2 2 3 3 4 4 5 5 ...
##  $ gender : int  0 1 0 1 0 1 0 1 0 1 ...
##  $ msi    : int  4 3 7 4 4 0 8 2 2 3 ...
##  $ das    : int  70 92 45 49 79 90 92 70 87 75 ...
##  $ sex    : int  61 64 69 68 49 56 66 68 42 56 ...
##  $ afc    : int  65 69 69 69 49 57 56 67 65 59 ...
##  $ dasc   : num  -14.49 7.51 -39.49 -35.49 -5.49 ...
##  $ sexc   : num  0.932 3.932 8.932 7.932 -11.068 ...
##  $ afcc   : num  1.65 5.65 5.65 5.65 -14.35 ...
##  $ infidel: int  0 0 0 0 0 0 0 0 0 0 ...
f = c(1,2,10)
dat[,f] = lapply(dat[,f], as.factor)

# recode factor levels
levels(dat$gender) <- c("Wife", "Husband")
levels(dat$infidel) <- c("no", "yes")

#Plot
ggplot(dat, aes(x = msi)) +
  geom_histogram(binwidth=1)+
  facet_grid(infidel~gender)+
  labs(x = "Marital Status Inventory", y = "Count")

ggplot(dat, aes(x =dasc, y=msi, color = gender)) +
  geom_point() +
  stat_smooth(aes(group = gender), method =  "glm", se = F) +
  facet_grid(~infidel)+
  labs(x = " Dyadic Adjustment Scale - centered", y ="Marital Status Inventory")

ggplot(dat, aes(x =dasc, y=msi, color = gender)) +
  geom_point() +
  stat_smooth(aes(group = gender), method =  "glm", se = T) +
  facet_grid(~infidel)+
  labs(x = "Affective communication score - centered",y ="Marital Status Inventory")

ggplot(dat, aes(x =dasc, y=msi, color = gender)) +
  geom_point() +
  stat_smooth(aes(group = gender), method =  "glm", se = T) +
  facet_grid(~infidel)+
  labs(x = "Sexsual dissatisfaction score- centered",y ="Marital Status Inventory")

##Log(μ(msi)ij) = α + β1 dasij + β2 infidelij + β3 genderj + β4 afcij + β5 sexdysij + β6 dasij×infidelij,
 #where i is the couple idex, i = 1, 2, ..., 133; and j is the spouse index, j = 1,2; 
 #and the marital status inventory, msiij, is assumed to follow a Poisson distribution.

require(gee)
## Loading required package: gee
summary(m<-gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, 
    id = couple, data = dat, family = poisson, corstr = "exchangeable"))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##     (Intercept)            dasc      infidelyes   genderHusband 
##     1.127447584    -0.017746288     0.525803868    -0.211613976 
##            afcc            sexc dasc:infidelyes 
##     0.020641206     0.006026088     0.020016627
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logarithm 
##  Variance to Mean Relation: Poisson 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = msi ~ dasc + infidel + gender + afcc + sexc + dasc:infidel, 
##     id = couple, data = dat, family = poisson, corstr = "exchangeable")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -6.0738805 -1.7362610 -0.2607464  1.5689169  6.0691957 
## 
## 
## Coefficients:
##                     Estimate  Naive S.E.   Naive z Robust S.E.  Robust z
## (Intercept)      1.122761266 0.073815193 15.210436 0.072177241 15.555614
## dasc            -0.017848494 0.004029852 -4.429069 0.003410110 -5.233993
## infidelyes       0.505261009 0.144164750  3.504747 0.119537471  4.226800
## genderHusband   -0.203380939 0.082501523 -2.465178 0.075600774 -2.690197
## afcc             0.016503411 0.008203429  2.011770 0.007672575  2.150961
## sexc             0.008513665 0.005466519  1.557420 0.005214695  1.632629
## dasc:infidelyes  0.016748315 0.007282451  2.299818 0.006527212  2.565922
## 
## Estimated Scale Parameter:  1.958107
## Number of Iterations:  3
## 
## Working Correlation
##           [,1]      [,2]
## [1,] 1.0000000 0.3356346
## [2,] 0.3356346 1.0000000

Q4 Geometry score

##model:ηij = b0i + β1 sexij + β2 ageij + β3 m.gcseij + β4-10 boardi + β11-20 Itypei,
 #Pr{Yij=0} = F(-ηij); 
 #Pr{Yij=2} = F(c-ηij) - F(-ηij); 
 #Pr{Yij=4} = F(c+d1-ηij) - F(c-ηij); 
 #Pr{Yij=6} = F(c+d1+d2-ηij) - F(c+d1-ηij); 
 #Pr{Yij=8} = F(c+d1+d2+d3-ηij) - F(c+d1+d2-ηij); 
 #Pr{Yij=10} = 1 - F(c+d1+d2+d3-ηij),
 #where i = 1, ..., 2,317; j = 1, ..., 33,276. The function F stands for the logistic cumulative probability distribution function. 

dat = read.table("http://140.116.183.121/~sheu/lmm/Data/geometryAL.txt", h = T)
f = c(2,3,6,7)
dat[,f] = lapply(dat[,f], as.factor)
str(dat)
## 'data.frame':    33276 obs. of  7 variables:
##  $ score : int  8 8 8 10 8 10 10 8 6 4 ...
##  $ board : Factor w/ 7 levels "1","2","3","5",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ gender: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 1 ...
##  $ age   : int  1 -6 5 -1 -5 3 3 4 -2 1 ...
##  $ mgcse : num  0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.657 ...
##  $ itype : Factor w/ 11 levels "1","2","3","4",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ iid   : Factor w/ 2317 levels "1001","1002",..: 1 1 1 1 1 1 1 1 1 2 ...
p = c("tidyverse", "magrittr", "lme4","vcrpart")
lapply(p, library, character.only = TRUE)
## [[1]]
##  [1] "gee"       "glmmTMB"   "vcrpart"   "partykit"  "grid"     
##  [6] "parallel"  "ordinal"   "lme4"      "Matrix"    "magrittr" 
## [11] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"   
## [16] "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices"
## [21] "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gee"       "glmmTMB"   "vcrpart"   "partykit"  "grid"     
##  [6] "parallel"  "ordinal"   "lme4"      "Matrix"    "magrittr" 
## [11] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"   
## [16] "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices"
## [21] "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gee"       "glmmTMB"   "vcrpart"   "partykit"  "grid"     
##  [6] "parallel"  "ordinal"   "lme4"      "Matrix"    "magrittr" 
## [11] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"   
## [16] "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices"
## [21] "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "gee"       "glmmTMB"   "vcrpart"   "partykit"  "grid"     
##  [6] "parallel"  "ordinal"   "lme4"      "Matrix"    "magrittr" 
## [11] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"   
## [16] "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices"
## [21] "utils"     "datasets"  "methods"   "base"
#Plot
ggplot(dat, aes(x = score)) +
  geom_histogram(aes(y =..density..),binwidth=1)+
  facet_grid(itype~board)+
  labs(x = "Geometry score", y = "Density")

# recode factor levels
levels(dat$gender) <- c("Male", "Female")
#plot
ggplot(dat, aes(x = score, fill=gender)) +
  geom_histogram(position = "dodge", binwidth=1)+      #設置條形圖類型為簇狀,若沒設定則為堆積圖
  labs(x = "Geometry score", y = "Count")

#plot
ggplot(dat, aes(x =mgcse, y=score)) +
  geom_point(size=.7) +
  stat_smooth(aes(group = 1), method =  "glm", se = T) +
  facet_grid(board~gender)+
  labs(x = "GCSE score, mean-centered ",y ="Geometry score")

dat$score<-as.factor(dat$score)
summary(mt <- olmm(score~gender+age+mgcse+re(1|iid), 
            data = dat, family = cumulative(link = "logit")))
## Linear Mixed Model fit by Marginal Maximum
## Likelihood with Gauss-Hermite Quadrature 
## 
##  Family: cumulative logit 
## Formula: score ~ gender + age + mgcse + re(1 | iid) 
##    Data: dat 
## 
## Goodness of fit:
##       AIC      BIC    logLik
##  110043.6 110119.3 -55012.78
## 
## Random effects:
## Subject: iid 
##             Variance  StdDev   
## (Intercept) 0.2847814 0.5336492
## Number of obs: 33276, subjects: 2317
## 
## Global fixed effects:
##                Estimate Std. Error  z value
## genderFemale -0.2788466  0.0194297 -14.3515
## age           0.0021999  0.0028603   0.7691
## mgcse        -1.6664873  0.0339073 -49.1483
## 
## Category-specific fixed effects:
##                   Estimate Std. Error  z value
## 0|2:(Intercept)  -2.357155   0.022811 -103.334
## 2|4:(Intercept)  -1.243897   0.020793  -59.824
## 4|6:(Intercept)  -0.219066   0.020336  -10.772
## 6|8:(Intercept)   0.908221   0.021368   42.504
## 8|10:(Intercept)  2.390568   0.022736  105.142

Q5 stop and frisk

dat = read.table("http://140.116.183.121/~sheu/lmm/Data/stopFrisk.txt", h = T)
str(dat)
## 'data.frame':    900 obs. of  6 variables:
##  $ stops   : int  75 36 74 17 37 39 23 3 26 32 ...
##  $ pop     : int  1720 1720 1720 1720 1368 1368 1368 1368 23854 23854 ...
##  $ arrest  : int  191 57 599 133 62 27 149 57 135 16 ...
##  $ precinct: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ race    : Factor w/ 3 levels "B","H","W": 1 1 1 1 2 2 2 2 3 3 ...
##  $ crime   : Factor w/ 4 levels "D","P","V","W": 3 4 2 1 3 4 2 1 3 4 ...
#plot
ggplot(dat, aes(x = stops)) +
  geom_histogram(fill ="lightblue", colour = "black")+
  labs(x = "Number of stops and frisk", y = "Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Plot
ggplot(dat, aes(x = stops)) +
  geom_histogram(aes(y =..density..), fill ="lightblue", colour = "black")+
  geom_density() +
  facet_grid(race~crime)+
  ylim(0,.005)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 9 rows containing missing values (geom_bar).

  labs(x = "Number of stops and frisk", y = "Density")
## $x
## [1] "Number of stops and frisk"
## 
## $y
## [1] "Density"
## 
## attr(,"class")
## [1] "labels"
##Model:  Yi = Poisson(uiθi),
 #where Yi is the number of police stops at precinct i with rate θi relative to number of arrests ui in the previous year (an exposure).
 #log(uiθi) = α + β1Raceb + β2Raceh + β3Crimed + β4Crimew + β5Crimep + β6Crimev + Σi=2 β6+i-1Precincti, i = 2, 3, ..., 75.
 #Log(ui) = log((15/12)(# of arrests in 1997)i) is the offset term (scaled to a 15 months period)

summary(m0 <- glmer(stops ~ race+crime+(1|precinct), data = dat))
## Warning in glmer(stops ~ race + crime + (1 | precinct), data = dat):
## calling glmer() with family=gaussian (identity link) as a shortcut to
## lmer() is deprecated; please call lmer() directly
## Linear mixed model fit by REML ['lmerMod']
## Formula: stops ~ race + crime + (1 | precinct)
##    Data: dat
## 
## REML criterion at convergence: 11965.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6784 -0.5291 -0.1594  0.2504  7.2914 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  precinct (Intercept)  2718     52.13  
##  Residual             34734    186.37  
## Number of obs: 900, groups:  precinct, 75
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   153.42      16.36   9.375
## raceH         -84.00      15.22  -5.520
## raceW        -176.16      15.22 -11.577
## crimeP         51.28      17.57   2.918
## crimeV         75.87      17.57   4.318
## crimeW        190.13      17.57  10.821
## 
## Correlation of Fixed Effects:
##        (Intr) raceH  raceW  crimeP crimeV
## raceH  -0.465                            
## raceW  -0.465  0.500                     
## crimeP -0.537  0.000  0.000              
## crimeV -0.537  0.000  0.000  0.500       
## crimeW -0.537  0.000  0.000  0.500  0.500
summary(m <- glmer(stops ~ race+crime+(1|precinct), data = dat, family = poisson))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: stops ~ race + crime + (1 | precinct)
##    Data: dat
## 
##      AIC      BIC   logLik deviance df.resid 
##  90378.1  90411.7 -45182.0  90364.1      893 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -19.957  -6.305  -2.187   4.215  73.227 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  precinct (Intercept) 0.3368   0.5803  
## Number of obs: 900, groups:  precinct, 75
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.518129   0.067580   66.86   <2e-16 ***
## raceH       -0.447715   0.006060  -73.88   <2e-16 ***
## raceW       -1.414281   0.008557 -165.29   <2e-16 ***
## crimeP       0.570296   0.010211   55.85   <2e-16 ***
## crimeV       0.759587   0.009888   76.82   <2e-16 ***
## crimeW       1.348198   0.009160  147.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##        (Intr) raceH  raceW  crimeP crimeV
## raceH  -0.035                            
## raceW  -0.025  0.276                     
## crimeP -0.097  0.000  0.000              
## crimeV -0.100  0.000  0.000  0.660       
## crimeW -0.108  0.000  0.000  0.712  0.735
anova(m, m0)
## refitting model(s) with ML (instead of REML)
## Data: dat
## Models:
## m: stops ~ race + crime + (1 | precinct)
## m0: stops ~ race + crime + (1 | precinct)
##    Df   AIC   BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## m   7 90378 90412 -45182    90364                            
## m0  8 12024 12062  -6004    12008 78356      1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1