Rにおけるマッチング

rm(list=ls())
library(pacman)
p_load(tidyverse, causalweight)

smoking = read_csv("https://github.com/LOST-STATS/lost-stats.github.io/raw/source/Model_Estimation/Matching/Data/smoking.csv")

smoking = smoking %>% mutate(smoke = 1*(smoke == "Yes"))

Step One: ロジスティック回帰を実行する。

ps_model = glm(smoke ~ gender+age+marital_status+ethnicity+region, data=smoking)

library(modelsummary)

modelsummary(ps_model)
tinytable_456p3cx09yl74wsaak0h
(1)
(Intercept) 0.546
(0.083)
genderMale 0.021
(0.021)
age -0.005
(0.001)
marital_statusMarried -0.181
(0.036)
marital_statusSeparated -0.065
(0.061)
marital_statusSingle -0.077
(0.041)
marital_statusWidowed -0.065
(0.046)
ethnicityBlack 0.006
(0.099)
ethnicityChinese -0.014
(0.104)
ethnicityMixed 0.137
(0.130)
ethnicityRefused 0.135
(0.134)
ethnicityUnknown 0.328
(0.304)
ethnicityWhite 0.075
(0.067)
regionMidlands & East Anglia -0.040
(0.038)
regionScotland 0.069
(0.048)
regionSouth East 0.001
(0.042)
regionSouth West 0.014
(0.047)
regionThe North -0.026
(0.039)
regionWales -0.024
(0.057)
Num.Obs. 1691
R2 0.079
AIC 1863.8
BIC 1972.5
Log.Lik. -911.923
F 7.995
RMSE 0.41

Step Two: 傾向スコアによるマッチング。

  • この状況には当てはまらないが、選択する共変量に欠損値がないことを確認する必要がある。
  • マッチさせるには、matchitコマンドを使用し、関数に数式、使用するデータ、方法(この場合は最近傍推定)を渡す。
library(MatchIt)

match = matchit(smoke ~ gender+age+marital_status+ethnicity+region, method = "nearest", data =smoking)

match
## A matchit object
##  - method: 1:1 nearest neighbor matching without replacement
##  - distance: Propensity score
##              - estimated with logistic regression
##  - number of obs.: 1691 (original), 842 (matched)
##  - target estimand: ATT
##  - covariates: gender, age, marital_status, ethnicity, region

Step Three: バランスを確認する。

summary(match)
## 
## Call:
## matchit(formula = smoke ~ gender + age + marital_status + ethnicity + 
##     region, data = smoking, method = "nearest")
## 
## Summary of Balance for All Data:
##                              Means Treated Means Control Std. Mean Diff.
## distance                            0.3083        0.2293          0.6482
## genderFemale                        0.5558        0.5756         -0.0398
## genderMale                          0.4442        0.4244          0.0398
## age                                42.7150       52.1969         -0.5860
## marital_statusDivorced              0.1378        0.0811          0.1644
## marital_statusMarried               0.3397        0.5268         -0.3951
## marital_statusSeparated             0.0523        0.0362          0.0721
## marital_statusSingle                0.3753        0.2118          0.3376
## marital_statusWidowed               0.0950        0.1441         -0.1674
## ethnicityAsian                      0.0190        0.0260         -0.0511
## ethnicityBlack                      0.0190        0.0205         -0.0108
## ethnicityChinese                    0.0119        0.0173         -0.0503
## ethnicityMixed                      0.0119        0.0071          0.0442
## ethnicityRefused                    0.0095        0.0071          0.0249
## ethnicityUnknown                    0.0024        0.0008          0.0326
## ethnicityWhite                      0.9264        0.9213          0.0195
## regionLondon                        0.1188        0.1039          0.0458
## regionMidlands & East Anglia        0.2185        0.2764         -0.1400
## regionScotland                      0.1211        0.0764          0.1372
## regionSouth East                    0.1544        0.1472          0.0198
## regionSouth West                    0.0998        0.0906          0.0307
## regionThe North                     0.2399        0.2559         -0.0375
## regionWales                         0.0475        0.0496         -0.0099
##                              Var. Ratio eCDF Mean eCDF Max
## distance                         1.0780    0.1869   0.2890
## genderFemale                          .    0.0198   0.0198
## genderMale                            .    0.0198   0.0198
## age                              0.7302    0.1200   0.2379
## marital_statusDivorced                .    0.0567   0.0567
## marital_statusMarried                 .    0.1871   0.1871
## marital_statusSeparated               .    0.0160   0.0160
## marital_statusSingle                  .    0.1635   0.1635
## marital_statusWidowed                 .    0.0491   0.0491
## ethnicityAsian                        .    0.0070   0.0070
## ethnicityBlack                        .    0.0015   0.0015
## ethnicityChinese                      .    0.0054   0.0054
## ethnicityMixed                        .    0.0048   0.0048
## ethnicityRefused                      .    0.0024   0.0024
## ethnicityUnknown                      .    0.0016   0.0016
## ethnicityWhite                        .    0.0051   0.0051
## regionLondon                          .    0.0148   0.0148
## regionMidlands & East Anglia          .    0.0579   0.0579
## regionScotland                        .    0.0448   0.0448
## regionSouth East                      .    0.0072   0.0072
## regionSouth West                      .    0.0092   0.0092
## regionThe North                       .    0.0160   0.0160
## regionWales                           .    0.0021   0.0021
## 
## Summary of Balance for Matched Data:
##                              Means Treated Means Control Std. Mean Diff.
## distance                            0.3083        0.3074          0.0079
## genderFemale                        0.5558        0.5511          0.0096
## genderMale                          0.4442        0.4489         -0.0096
## age                                42.7150       42.0641          0.0402
## marital_statusDivorced              0.1378        0.1306          0.0207
## marital_statusMarried               0.3397        0.3729         -0.0702
## marital_statusSeparated             0.0523        0.0546         -0.0107
## marital_statusSingle                0.3753        0.3682          0.0147
## marital_statusWidowed               0.0950        0.0736          0.0729
## ethnicityAsian                      0.0190        0.0190          0.0000
## ethnicityBlack                      0.0190        0.0166          0.0174
## ethnicityChinese                    0.0119        0.0048          0.0658
## ethnicityMixed                      0.0119        0.0119          0.0000
## ethnicityRefused                    0.0095        0.0071          0.0245
## ethnicityUnknown                    0.0024        0.0024          0.0000
## ethnicityWhite                      0.9264        0.9382         -0.0455
## regionLondon                        0.1188        0.1116          0.0220
## regionMidlands & East Anglia        0.2185        0.2185          0.0000
## regionScotland                      0.1211        0.1235         -0.0073
## regionSouth East                    0.1544        0.1496          0.0131
## regionSouth West                    0.0998        0.0855          0.0476
## regionThe North                     0.2399        0.2613         -0.0501
## regionWales                         0.0475        0.0499         -0.0112
##                              Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance                         1.0142    0.0019   0.0261          0.0098
## genderFemale                          .    0.0048   0.0048          0.8509
## genderMale                            .    0.0048   0.0048          0.8509
## age                              0.9215    0.0127   0.0404          0.6383
## marital_statusDivorced                .    0.0071   0.0071          0.6272
## marital_statusMarried                 .    0.0333   0.0333          0.5417
## marital_statusSeparated               .    0.0024   0.0024          0.3949
## marital_statusSingle                  .    0.0071   0.0071          0.6623
## marital_statusWidowed                 .    0.0214   0.0214          0.4293
## ethnicityAsian                        .    0.0000   0.0000          0.0380
## ethnicityBlack                        .    0.0024   0.0024          0.2262
## ethnicityChinese                      .    0.0071   0.0071          0.1535
## ethnicityMixed                        .    0.0000   0.0000          0.0238
## ethnicityRefused                      .    0.0024   0.0024          0.1714
## ethnicityUnknown                      .    0.0000   0.0000          0.0048
## ethnicityWhite                        .    0.0119   0.0119          0.4820
## regionLondon                          .    0.0071   0.0071          0.5654
## regionMidlands & East Anglia          .    0.0000   0.0000          0.2185
## regionScotland                        .    0.0024   0.0024          0.4732
## regionSouth East                      .    0.0048   0.0048          0.6442
## regionSouth West                      .    0.0143   0.0143          0.4280
## regionThe North                       .    0.0214   0.0214          0.5507
## regionWales                           .    0.0024   0.0024          0.4132
## 
## Sample Sizes:
##           Control Treated
## All          1270     421
## Matched       421     421
## Unmatched     849       0
## Discarded       0       0
##Create a data frame from matches using the match.data function.
match_data = match.data(match)

#Check the dimensions.
dim(match_data)
## [1] 842  15

Step Four: 新しいサンプルを使って分析を行う。

  • 回帰で使用することができるように、婚姻状態を因子変数に変える。
match_data = match_data %>% 
  mutate(marital_status = as.factor(marital_status))
lm_nocontrols = lm(marital_status ~ smoke, data= match_data)
modelsummary(lm_nocontrols)
tinytable_gz71j46rah98po4fp2v8
(1)
(Intercept) 2.881
smoke 0.069
Num.Obs. 842

コントロールあり、標準誤差もここでは誤り

lm_controls =lm(marital_status ~ smoke+age+gender+ethnicity+marital_status, data=match_data)

modelsummary(lm_controls)
tinytable_vkkgkowjijevchla1qpu
(1)
(Intercept) 3.051
smoke 0.074
age -0.008
genderMale 0.035
ethnicityBlack 0.821
ethnicityChinese -0.252
ethnicityMixed -0.008
ethnicityRefused 0.830
ethnicityUnknown -1.157
ethnicityWhite 0.141
Num.Obs. 842

比較

models <- list(lm_nocontrols, lm_controls)
modelplot(models)