Rにおけるマッチング
rm(list=ls())
library(pacman)
p_load(tidyverse, causalweight)
smoking = read_csv("https://github.com/LOST-STATS/lost-stats.github.io/raw/source/Model_Estimation/Matching/Data/smoking.csv")
smoking = smoking %>% mutate(smoke = 1*(smoke == "Yes"))
Step One: ロジスティック回帰を実行する。
ps_model = glm(smoke ~ gender+age+marital_status+ethnicity+region, data=smoking)
library(modelsummary)
modelsummary(ps_model)
tinytable_456p3cx09yl74wsaak0h
|
(1) |
(Intercept) |
0.546 |
|
(0.083) |
genderMale |
0.021 |
|
(0.021) |
age |
-0.005 |
|
(0.001) |
marital_statusMarried |
-0.181 |
|
(0.036) |
marital_statusSeparated |
-0.065 |
|
(0.061) |
marital_statusSingle |
-0.077 |
|
(0.041) |
marital_statusWidowed |
-0.065 |
|
(0.046) |
ethnicityBlack |
0.006 |
|
(0.099) |
ethnicityChinese |
-0.014 |
|
(0.104) |
ethnicityMixed |
0.137 |
|
(0.130) |
ethnicityRefused |
0.135 |
|
(0.134) |
ethnicityUnknown |
0.328 |
|
(0.304) |
ethnicityWhite |
0.075 |
|
(0.067) |
regionMidlands & East Anglia |
-0.040 |
|
(0.038) |
regionScotland |
0.069 |
|
(0.048) |
regionSouth East |
0.001 |
|
(0.042) |
regionSouth West |
0.014 |
|
(0.047) |
regionThe North |
-0.026 |
|
(0.039) |
regionWales |
-0.024 |
|
(0.057) |
Num.Obs. |
1691 |
R2 |
0.079 |
AIC |
1863.8 |
BIC |
1972.5 |
Log.Lik. |
-911.923 |
F |
7.995 |
RMSE |
0.41 |
Step Two: 傾向スコアによるマッチング。
- この状況には当てはまらないが、選択する共変量に欠損値がないことを確認する必要がある。
- マッチさせるには、matchitコマンドを使用し、関数に数式、使用するデータ、方法(この場合は最近傍推定)を渡す。
library(MatchIt)
match = matchit(smoke ~ gender+age+marital_status+ethnicity+region, method = "nearest", data =smoking)
match
## A matchit object
## - method: 1:1 nearest neighbor matching without replacement
## - distance: Propensity score
## - estimated with logistic regression
## - number of obs.: 1691 (original), 842 (matched)
## - target estimand: ATT
## - covariates: gender, age, marital_status, ethnicity, region
Step Three: バランスを確認する。
summary(match)
##
## Call:
## matchit(formula = smoke ~ gender + age + marital_status + ethnicity +
## region, data = smoking, method = "nearest")
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff.
## distance 0.3083 0.2293 0.6482
## genderFemale 0.5558 0.5756 -0.0398
## genderMale 0.4442 0.4244 0.0398
## age 42.7150 52.1969 -0.5860
## marital_statusDivorced 0.1378 0.0811 0.1644
## marital_statusMarried 0.3397 0.5268 -0.3951
## marital_statusSeparated 0.0523 0.0362 0.0721
## marital_statusSingle 0.3753 0.2118 0.3376
## marital_statusWidowed 0.0950 0.1441 -0.1674
## ethnicityAsian 0.0190 0.0260 -0.0511
## ethnicityBlack 0.0190 0.0205 -0.0108
## ethnicityChinese 0.0119 0.0173 -0.0503
## ethnicityMixed 0.0119 0.0071 0.0442
## ethnicityRefused 0.0095 0.0071 0.0249
## ethnicityUnknown 0.0024 0.0008 0.0326
## ethnicityWhite 0.9264 0.9213 0.0195
## regionLondon 0.1188 0.1039 0.0458
## regionMidlands & East Anglia 0.2185 0.2764 -0.1400
## regionScotland 0.1211 0.0764 0.1372
## regionSouth East 0.1544 0.1472 0.0198
## regionSouth West 0.0998 0.0906 0.0307
## regionThe North 0.2399 0.2559 -0.0375
## regionWales 0.0475 0.0496 -0.0099
## Var. Ratio eCDF Mean eCDF Max
## distance 1.0780 0.1869 0.2890
## genderFemale . 0.0198 0.0198
## genderMale . 0.0198 0.0198
## age 0.7302 0.1200 0.2379
## marital_statusDivorced . 0.0567 0.0567
## marital_statusMarried . 0.1871 0.1871
## marital_statusSeparated . 0.0160 0.0160
## marital_statusSingle . 0.1635 0.1635
## marital_statusWidowed . 0.0491 0.0491
## ethnicityAsian . 0.0070 0.0070
## ethnicityBlack . 0.0015 0.0015
## ethnicityChinese . 0.0054 0.0054
## ethnicityMixed . 0.0048 0.0048
## ethnicityRefused . 0.0024 0.0024
## ethnicityUnknown . 0.0016 0.0016
## ethnicityWhite . 0.0051 0.0051
## regionLondon . 0.0148 0.0148
## regionMidlands & East Anglia . 0.0579 0.0579
## regionScotland . 0.0448 0.0448
## regionSouth East . 0.0072 0.0072
## regionSouth West . 0.0092 0.0092
## regionThe North . 0.0160 0.0160
## regionWales . 0.0021 0.0021
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff.
## distance 0.3083 0.3074 0.0079
## genderFemale 0.5558 0.5511 0.0096
## genderMale 0.4442 0.4489 -0.0096
## age 42.7150 42.0641 0.0402
## marital_statusDivorced 0.1378 0.1306 0.0207
## marital_statusMarried 0.3397 0.3729 -0.0702
## marital_statusSeparated 0.0523 0.0546 -0.0107
## marital_statusSingle 0.3753 0.3682 0.0147
## marital_statusWidowed 0.0950 0.0736 0.0729
## ethnicityAsian 0.0190 0.0190 0.0000
## ethnicityBlack 0.0190 0.0166 0.0174
## ethnicityChinese 0.0119 0.0048 0.0658
## ethnicityMixed 0.0119 0.0119 0.0000
## ethnicityRefused 0.0095 0.0071 0.0245
## ethnicityUnknown 0.0024 0.0024 0.0000
## ethnicityWhite 0.9264 0.9382 -0.0455
## regionLondon 0.1188 0.1116 0.0220
## regionMidlands & East Anglia 0.2185 0.2185 0.0000
## regionScotland 0.1211 0.1235 -0.0073
## regionSouth East 0.1544 0.1496 0.0131
## regionSouth West 0.0998 0.0855 0.0476
## regionThe North 0.2399 0.2613 -0.0501
## regionWales 0.0475 0.0499 -0.0112
## Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance 1.0142 0.0019 0.0261 0.0098
## genderFemale . 0.0048 0.0048 0.8509
## genderMale . 0.0048 0.0048 0.8509
## age 0.9215 0.0127 0.0404 0.6383
## marital_statusDivorced . 0.0071 0.0071 0.6272
## marital_statusMarried . 0.0333 0.0333 0.5417
## marital_statusSeparated . 0.0024 0.0024 0.3949
## marital_statusSingle . 0.0071 0.0071 0.6623
## marital_statusWidowed . 0.0214 0.0214 0.4293
## ethnicityAsian . 0.0000 0.0000 0.0380
## ethnicityBlack . 0.0024 0.0024 0.2262
## ethnicityChinese . 0.0071 0.0071 0.1535
## ethnicityMixed . 0.0000 0.0000 0.0238
## ethnicityRefused . 0.0024 0.0024 0.1714
## ethnicityUnknown . 0.0000 0.0000 0.0048
## ethnicityWhite . 0.0119 0.0119 0.4820
## regionLondon . 0.0071 0.0071 0.5654
## regionMidlands & East Anglia . 0.0000 0.0000 0.2185
## regionScotland . 0.0024 0.0024 0.4732
## regionSouth East . 0.0048 0.0048 0.6442
## regionSouth West . 0.0143 0.0143 0.4280
## regionThe North . 0.0214 0.0214 0.5507
## regionWales . 0.0024 0.0024 0.4132
##
## Sample Sizes:
## Control Treated
## All 1270 421
## Matched 421 421
## Unmatched 849 0
## Discarded 0 0
##Create a data frame from matches using the match.data function.
match_data = match.data(match)
#Check the dimensions.
dim(match_data)
## [1] 842 15
Step Four: 新しいサンプルを使って分析を行う。
- 回帰で使用することができるように、婚姻状態を因子変数に変える。
match_data = match_data %>%
mutate(marital_status = as.factor(marital_status))
lm_nocontrols = lm(marital_status ~ smoke, data= match_data)
modelsummary(lm_nocontrols)
tinytable_gz71j46rah98po4fp2v8
|
(1) |
(Intercept) |
2.881 |
smoke |
0.069 |
Num.Obs. |
842 |
コントロールあり、標準誤差もここでは誤り
lm_controls =lm(marital_status ~ smoke+age+gender+ethnicity+marital_status, data=match_data)
modelsummary(lm_controls)
tinytable_vkkgkowjijevchla1qpu
|
(1) |
(Intercept) |
3.051 |
smoke |
0.074 |
age |
-0.008 |
genderMale |
0.035 |
ethnicityBlack |
0.821 |
ethnicityChinese |
-0.252 |
ethnicityMixed |
-0.008 |
ethnicityRefused |
0.830 |
ethnicityUnknown |
-1.157 |
ethnicityWhite |
0.141 |
Num.Obs. |
842 |
比較
models <- list(lm_nocontrols, lm_controls)
modelplot(models)
