Weight
pacman::p_load(survey)
library(dplyr)
##
## 載入套件:'dplyr'
## 下列物件被遮斷自 'package:stats':
##
## filter, lag
## 下列物件被遮斷自 'package:base':
##
## intersect, setdiff, setequal, union
data(api, package="survey")
dta <- apipop[,c("cds", "stype", "cname", "api00", "meals", "grad.sch")]
str(dta)
## 'data.frame': 6194 obs. of 6 variables:
## $ cds : chr "01611190130229" "01611190132878" "01611196000004" "01611196090005" ...
## $ stype : Factor w/ 3 levels "E","H","M": 2 2 3 1 1 1 1 1 3 1 ...
## $ cname : chr "Alameda" "Alameda" "Alameda" "Alameda" ...
## $ api00 : int 731 622 622 774 811 780 808 739 795 650 ...
## $ meals : int 14 20 55 35 15 25 22 50 10 71 ...
## $ grad.sch: int 18 9 8 15 33 13 12 6 28 3 ...
API (2000) and percent elligible for meals
ggplot(dta,
aes(meals, api00, group=cname)) +
geom_point(alpha=.2,
col='gray')+
stat_smooth(method='lm',
formula=y~x,
se=F,
size=rel(.2),
col='black')+
labs(x="Percent elligible for meals",
y="API in year 2000")+
theme_minimal()

#Simple random sampling
N <- dim(dta)[1]
n <- 300
set.seed(1999)
dta_srs <- dta %>% sample_n(n) %>% mutate(w = N/n())
knitr::kable(head(dta_srs))
| 1188 |
19642956057483 |
E |
Los Angeles |
612 |
73 |
4 |
20.64667 |
| 1885 |
19647336018907 |
E |
Los Angeles |
605 |
68 |
3 |
20.64667 |
| 2098 |
19647666020192 |
E |
Los Angeles |
796 |
24 |
17 |
20.64667 |
| 690 |
10621666088942 |
E |
Fresno |
424 |
100 |
0 |
20.64667 |
| 1430 |
19646676108419 |
E |
Los Angeles |
642 |
52 |
5 |
20.64667 |
| 3552 |
33736766031728 |
E |
Riverside |
473 |
77 |
1 |
20.64667 |
#Type of Schools across different counties
with(dta, table(cname, stype)) |> head(9) |> knitr::kable()
| Alameda |
196 |
31 |
52 |
| Amador |
6 |
2 |
2 |
| Butte |
30 |
9 |
9 |
| Calaveras |
7 |
1 |
2 |
| Colusa |
4 |
3 |
2 |
| Contra Costa |
120 |
26 |
33 |
| Del Norte |
6 |
1 |
1 |
| El Dorado |
26 |
5 |
9 |
| Fresno |
127 |
25 |
34 |
#Strata sampling by school type
dta_sch <- dta %>%
group_by(stype) %>%
filter(stype == "E") %>%
sample_n(100) %>%
bind_rows(., dta %>% filter(stype != "E") %>%
group_by(stype) %>% sample_n(100)) %>%
arrange(cds) %>%
ungroup()
knitr::kable(head(dta_sch))
| 01611436056857 |
M |
Alameda |
762 |
28 |
39 |
| 01611436090211 |
E |
Alameda |
767 |
45 |
31 |
| 01611766056873 |
M |
Alameda |
773 |
16 |
19 |
| 01612340130054 |
H |
Alameda |
640 |
19 |
6 |
| 01612426056980 |
M |
Alameda |
617 |
31 |
7 |
| 01612590136051 |
H |
Alameda |
500 |
38 |
7 |
#Strata sampling weights
dta_sch <- dta %>%
group_by(stype) %>%
dplyr::summarize(fpc = n()) %>%
left_join(dta_sch, .) %>%
group_by(stype) %>%
mutate(w = fpc/n())
## Joining, by = "stype"
knitr::kable(head(dta_sch))
| 01611436056857 |
M |
Alameda |
762 |
28 |
39 |
1018 |
10.18 |
| 01611436090211 |
E |
Alameda |
767 |
45 |
31 |
4421 |
44.21 |
| 01611766056873 |
M |
Alameda |
773 |
16 |
19 |
1018 |
10.18 |
| 01612340130054 |
H |
Alameda |
640 |
19 |
6 |
755 |
7.55 |
| 01612426056980 |
M |
Alameda |
617 |
31 |
7 |
1018 |
10.18 |
| 01612590136051 |
H |
Alameda |
500 |
38 |
7 |
755 |
7.55 |
#Mixed-effects model with weights
m0 <- lme4::lmer(api00 ~ meals + (1 | cname), data=dta_srs)
m0_srs <- lme4::lmer(api00 ~ meals + (1 | cname), data=dta_srs, weights = w)
m0_sch <- lme4::lmer(api00 ~ meals + (1 | cname), data=dta_sch, weights = w)
#Comparing results
modelsummary::msummary(list("SRS" = m0_srs, "Strata" = m0_sch, "NW"=m0))
|
|
SRS
|
Strata
|
NW
|
|
(Intercept)
|
829.143
|
831.395
|
829.143
|
|
|
(9.395)
|
(9.433)
|
(9.395)
|
|
meals
|
−3.439
|
−3.380
|
−3.439
|
|
|
(0.146)
|
(0.136)
|
(0.146)
|
|
SD (Intercept cname)
|
21.542
|
27.555
|
21.542
|
|
SD (Observations)
|
327.599
|
319.933
|
72.097
|
|
Num.Obs.
|
300
|
300
|
300
|
|
R2 Marg.
|
0.096
|
0.091
|
0.668
|
|
R2 Cond.
|
0.099
|
0.097
|
0.695
|
|
AIC
|
3436.7
|
3525.4
|
3436.7
|
|
BIC
|
3451.5
|
3540.2
|
3451.5
|
|
ICC
|
0.0
|
0.0
|
0.1
|
|
RMSE
|
70.54
|
79.30
|
70.54
|
END