[Multilevel models] Class Exercise

2022-09-24

Wide to long format

## 載入需要的套件:lme4
## 載入需要的套件:Matrix
## 
## 載入套件:'tidyr'
## 下列物件被遮斷自 'package:Matrix':
## 
##     expand, pack, unpack
data(Gcsemv, package="mlmRev")
names(Gcsemv) <- c("School", "Student", "Gender", "Written", "Course")
dta <- Gcsemv |> pivot_longer(cols=4:5, names_to="Test", values_to="Score")
ggplot(na.omit(dta), 
       aes(x=Test, y=Score, 
           color=Gender))+
  stat_summary(fun.data="mean_cl_boot", 
               geom='pointrange')+
  stat_summary(aes(group=Gender), 
               fun=mean, geom="line")+
  labs(x="Test", 
       y="Score")+
  theme_minimal()+
  theme(legend.position=c(.9,.8))

Weight

pacman::p_load(survey)
library(dplyr)
## 
## 載入套件:'dplyr'
## 下列物件被遮斷自 'package:stats':
## 
##     filter, lag
## 下列物件被遮斷自 'package:base':
## 
##     intersect, setdiff, setequal, union
data(api, package="survey")
dta <- apipop[,c("cds", "stype", "cname", "api00", "meals", "grad.sch")]
str(dta)
## 'data.frame':    6194 obs. of  6 variables:
##  $ cds     : chr  "01611190130229" "01611190132878" "01611196000004" "01611196090005" ...
##  $ stype   : Factor w/ 3 levels "E","H","M": 2 2 3 1 1 1 1 1 3 1 ...
##  $ cname   : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
##  $ api00   : int  731 622 622 774 811 780 808 739 795 650 ...
##  $ meals   : int  14 20 55 35 15 25 22 50 10 71 ...
##  $ grad.sch: int  18 9 8 15 33 13 12 6 28 3 ...

API (2000) and percent elligible for meals

ggplot(dta, 
       aes(meals, api00, group=cname)) +
  geom_point(alpha=.2,
             col='gray')+
  stat_smooth(method='lm', 
              formula=y~x, 
              se=F, 
              size=rel(.2), 
              col='black')+
  labs(x="Percent elligible for meals",
       y="API in year 2000")+
  theme_minimal()

#Simple random sampling
N <- dim(dta)[1]
n <- 300
set.seed(1999)
dta_srs <- dta %>% sample_n(n) %>% mutate(w = N/n()) 
knitr::kable(head(dta_srs))
cds stype cname api00 meals grad.sch w
1188 19642956057483 E Los Angeles 612 73 4 20.64667
1885 19647336018907 E Los Angeles 605 68 3 20.64667
2098 19647666020192 E Los Angeles 796 24 17 20.64667
690 10621666088942 E Fresno 424 100 0 20.64667
1430 19646676108419 E Los Angeles 642 52 5 20.64667
3552 33736766031728 E Riverside 473 77 1 20.64667
#Type of Schools across different counties
with(dta, table(cname, stype)) |> head(9) |> knitr::kable()
E H M
Alameda 196 31 52
Amador 6 2 2
Butte 30 9 9
Calaveras 7 1 2
Colusa 4 3 2
Contra Costa 120 26 33
Del Norte 6 1 1
El Dorado 26 5 9
Fresno 127 25 34
#Strata sampling by school type
dta_sch <- dta %>%
  group_by(stype) %>%
  filter(stype == "E") %>%
  sample_n(100) %>%
  bind_rows(., dta %>% filter(stype != "E") %>% 
              group_by(stype) %>% sample_n(100)) %>% 
  arrange(cds) %>% 
  ungroup() 
knitr::kable(head(dta_sch))
cds stype cname api00 meals grad.sch
01611436056857 M Alameda 762 28 39
01611436090211 E Alameda 767 45 31
01611766056873 M Alameda 773 16 19
01612340130054 H Alameda 640 19 6
01612426056980 M Alameda 617 31 7
01612590136051 H Alameda 500 38 7
#Strata sampling weights
dta_sch <- dta %>% 
  group_by(stype) %>%
  dplyr::summarize(fpc = n()) %>%
  left_join(dta_sch, .) %>%
  group_by(stype) %>%
  mutate(w = fpc/n()) 
## Joining, by = "stype"
knitr::kable(head(dta_sch))
cds stype cname api00 meals grad.sch fpc w
01611436056857 M Alameda 762 28 39 1018 10.18
01611436090211 E Alameda 767 45 31 4421 44.21
01611766056873 M Alameda 773 16 19 1018 10.18
01612340130054 H Alameda 640 19 6 755 7.55
01612426056980 M Alameda 617 31 7 1018 10.18
01612590136051 H Alameda 500 38 7 755 7.55
#Mixed-effects model with weights
m0 <- lme4::lmer(api00 ~ meals + (1 | cname), data=dta_srs) 
m0_srs <- lme4::lmer(api00 ~ meals + (1 | cname), data=dta_srs, weights = w) 
m0_sch <- lme4::lmer(api00 ~ meals + (1 | cname), data=dta_sch, weights = w)
#Comparing results
modelsummary::msummary(list("SRS" = m0_srs, "Strata" = m0_sch, "NW"=m0))
SRS Strata NW
(Intercept) 829.143 831.395 829.143
(9.395) (9.433) (9.395)
meals −3.439 −3.380 −3.439
(0.146) (0.136) (0.146)
SD (Intercept cname) 21.542 27.555 21.542
SD (Observations) 327.599 319.933 72.097
Num.Obs. 300 300 300
R2 Marg. 0.096 0.091 0.668
R2 Cond. 0.099 0.097 0.695
AIC 3436.7 3525.4 3436.7
BIC 3451.5 3540.2 3451.5
ICC 0.0 0.0 0.1
RMSE 70.54 79.30 70.54

END