https://datascienceplus.com/how-to-use-r-for-matching-samples-propensity-score/
pacman::p_load(knitr, wakefield, MatchIt, tableone, captioner)
create sample data and popuation data
sample data
set.seed(1234)
df.patients <- r_data_frame(n = 250, age(x = 30:78, name = 'Age'), sex(x = c("Male", "Female"), prob = c(0.70, 0.30), name = "Sex"))
## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
df.patients$Sample <- as.factor('Patients')
population data
set.seed(1234)
df.population <- r_data_frame(n = 1000,
age(x = 18:80,
name = 'Age'),
sex(x = c("Male", "Female"),
prob = c(0.50, 0.50),
name = "Sex"))
df.population$Sample <- as.factor('Population')
merge data
mydata <- rbind(df.patients, df.population)
mydata$Group <- as.logical(mydata$Sample == 'Patients')
mydata$Distress <- ifelse(mydata$Sex == 'Male', age(nrow(mydata), x = 0:42, name = 'Distress'),
age(nrow(mydata), x = 15:42, name = 'Distress'))
library(pacman)
## Warning: package 'pacman' was built under R version 4.0.3
Table 1: Comparison of unmatched samples
pacman::p_load(tableone)
table1 <- CreateTableOne(vars = c('Age', 'Sex', 'Distress'),
data = mydata,
factorVars = 'Sex',
strata = 'Sample')
table1 <- print(table1,
printToggle = FALSE,
noSpaces = TRUE)
kable(table1[,1:3],
align = 'c',
caption = 'Table 1: Comparison of unmatched samples')
Table 1: Comparison of unmatched samples
| n |
250 |
1000 |
|
| Age (mean (SD)) |
53.51 (13.86) |
49.06 (17.77) |
<0.001 |
| Sex = Female (%) |
87 (34.8) |
513 (51.3) |
<0.001 |
| Distress (mean (SD)) |
23.53 (11.94) |
25.44 (10.97) |
0.015 |
matching
set.seed(1234)
match.it <- matchit(Group ~ Age + Sex, data = mydata, method="nearest", ratio=1)
a <- summary(match.it)
kable(a$nn, digits = 2, align = 'c',
caption = 'Table 2: Sample sizes')
Table 2: Sample sizes
| All |
1000 |
250 |
| Matched |
250 |
250 |
| Unmatched |
750 |
0 |
| Discarded |
0 |
0 |
kable(a$sum.matched[c(1,2,4)], digits = 2, align = 'c',
caption = 'Table 3: Summary of balance for matched data')
Table 3: Summary of balance for matched data
| distance |
0.22 |
0.22 |
0.00 |
| Age |
53.51 |
53.47 |
0.04 |
| SexMale |
0.65 |
0.65 |
0.00 |
| SexFemale |
0.35 |
0.35 |
0.00 |
plot(match.it, type = 'jitter', interactive = FALSE)

save new dataset
df.match <- match.data(match.it)[1:ncol(mydata)]
rm(df.patients, df.population)
new result
pacman::p_load(tableone)
table4 <- CreateTableOne(vars = c('Age', 'Sex', 'Distress'),
data = df.match,
factorVars = 'Sex',
strata = 'Sample')
table4 <- print(table4,
printToggle = FALSE,
noSpaces = TRUE)
kable(table4[,1:3],
align = 'c',
caption = 'Table 4: Comparison of matched samples')
Table 4: Comparison of matched samples
| n |
250 |
250 |
|
| Age (mean (SD)) |
53.51 (13.86) |
53.47 (13.85) |
0.974 |
| Sex = Female (%) |
87 (34.8) |
87 (34.8) |
1.000 |
| Distress (mean (SD)) |
23.53 (11.94) |
23.68 (11.85) |
0.884 |
LS0tDQp0aXRsZTogIlByb3BlbnNpdHkgU2NvcmUgQW5hbHlzaXMgKFBTQSkiDQphdXRob3I6ICJCaW5oIFRoYW5nIFRyYW4iDQpkYXRlOiAiT2N0LzIzLzIwMjAiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgY29kZV9kb3dubG9hZDogeWVzDQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgbnVtYmVyX3NlY3Rpb25zOiB5ZXMNCiAgICB0aGVtZTogam91cm5hbA0KICAgIHRvYzogeWVzDQogICAgdG9jX2Zsb2F0OiB5ZXMNCiAgd29yZF9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KLS0tDQoNCg0KaHR0cHM6Ly9kYXRhc2NpZW5jZXBsdXMuY29tL2hvdy10by11c2Utci1mb3ItbWF0Y2hpbmctc2FtcGxlcy1wcm9wZW5zaXR5LXNjb3JlLw0KDQoNCg0KDQohW1BTQV0oaHR0cHM6Ly93d3cuc3RhdGlzdGljc2hvd3RvLmNvbS93cC1jb250ZW50L3VwbG9hZHMvMjAxNS8wOS9yYW5kb21pemVkLWNvbnRyb2xsZWQtdHJpYWwucG5nKQ0KDQotLS0NCg0KYGBge3J9DQpwYWNtYW46OnBfbG9hZChrbml0ciwgd2FrZWZpZWxkLCBNYXRjaEl0LCB0YWJsZW9uZSwgY2FwdGlvbmVyKQ0KYGBgDQoNCg0KIyMjIyANCmBgYHtyfQ0KbGlicmFyeSh3YWtlZmllbGQpDQoNCmBgYA0KDQoNCiMjIGNyZWF0ZSBzYW1wbGUgZGF0YSBhbmQgcG9wdWF0aW9uIGRhdGENCg0KIyMjIHNhbXBsZSBkYXRhIA0KYGBge3J9DQpzZXQuc2VlZCgxMjM0KQ0KZGYucGF0aWVudHMgPC0gcl9kYXRhX2ZyYW1lKG4gPSAyNTAsIGFnZSh4ID0gMzA6NzgsICAgbmFtZSA9ICdBZ2UnKSwgIHNleCh4ID0gYygiTWFsZSIsICJGZW1hbGUiKSwgIHByb2IgPSBjKDAuNzAsIDAuMzApLCAgbmFtZSA9ICJTZXgiKSkNCg0KZGYucGF0aWVudHMkU2FtcGxlIDwtIGFzLmZhY3RvcignUGF0aWVudHMnKQ0KDQpgYGANCg0KDQojIyMgcG9wdWxhdGlvbiBkYXRhDQoNCg0KYGBge3J9DQpzZXQuc2VlZCgxMjM0KQ0KZGYucG9wdWxhdGlvbiA8LSByX2RhdGFfZnJhbWUobiA9IDEwMDAsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWdlKHggPSAxODo4MCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbmFtZSA9ICdBZ2UnKSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzZXgoeCA9IGMoIk1hbGUiLCAiRmVtYWxlIiksIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHByb2IgPSBjKDAuNTAsIDAuNTApLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBuYW1lID0gIlNleCIpKQ0KZGYucG9wdWxhdGlvbiRTYW1wbGUgPC0gYXMuZmFjdG9yKCdQb3B1bGF0aW9uJykNCmBgYA0KDQoNCiMjIyBtZXJnZSBkYXRhIA0KDQpgYGB7cn0NCm15ZGF0YSA8LSByYmluZChkZi5wYXRpZW50cywgZGYucG9wdWxhdGlvbikNCg0KbXlkYXRhJEdyb3VwIDwtIGFzLmxvZ2ljYWwobXlkYXRhJFNhbXBsZSA9PSAnUGF0aWVudHMnKQ0KbXlkYXRhJERpc3RyZXNzIDwtIGlmZWxzZShteWRhdGEkU2V4ID09ICdNYWxlJywgYWdlKG5yb3cobXlkYXRhKSwgeCA9IDA6NDIsIG5hbWUgPSAnRGlzdHJlc3MnKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFnZShucm93KG15ZGF0YSksIHggPSAxNTo0MiwgbmFtZSA9ICdEaXN0cmVzcycpKQ0KYGBgDQoNCg0KIyMjDQoNCmBgYHtyfQ0KbGlicmFyeShwYWNtYW4pDQpgYGANCg0KIyMgVGFibGUgMTogQ29tcGFyaXNvbiBvZiB1bm1hdGNoZWQgc2FtcGxlcw0KDQpgYGB7cn0NCnBhY21hbjo6cF9sb2FkKHRhYmxlb25lKQ0KdGFibGUxIDwtIENyZWF0ZVRhYmxlT25lKHZhcnMgPSBjKCdBZ2UnLCAnU2V4JywgJ0Rpc3RyZXNzJyksIA0KICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSBteWRhdGEsIA0KICAgICAgICAgICAgICAgICAgICAgICAgIGZhY3RvclZhcnMgPSAnU2V4JywgDQogICAgICAgICAgICAgICAgICAgICAgICAgc3RyYXRhID0gJ1NhbXBsZScpDQp0YWJsZTEgPC0gcHJpbnQodGFibGUxLCANCiAgICAgICAgICAgICAgICBwcmludFRvZ2dsZSA9IEZBTFNFLCANCiAgICAgICAgICAgICAgICBub1NwYWNlcyA9IFRSVUUpDQprYWJsZSh0YWJsZTFbLDE6M10sICANCiAgICAgIGFsaWduID0gJ2MnLCANCiAgICAgIGNhcHRpb24gPSAnVGFibGUgMTogQ29tcGFyaXNvbiBvZiB1bm1hdGNoZWQgc2FtcGxlcycpDQpgYGANCg0KDQoNCiMjIG1hdGNoaW5nDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzNCkNCg0KbWF0Y2guaXQgPC0gbWF0Y2hpdChHcm91cCB+IEFnZSArIFNleCwgZGF0YSA9IG15ZGF0YSwgbWV0aG9kPSJuZWFyZXN0IiwgcmF0aW89MSkNCg0KYSA8LSBzdW1tYXJ5KG1hdGNoLml0KQ0KYGBgDQoNCmBgYHtyfQ0Ka2FibGUoYSRubiwgZGlnaXRzID0gMiwgYWxpZ24gPSAnYycsIA0KICAgICAgY2FwdGlvbiA9ICdUYWJsZSAyOiBTYW1wbGUgc2l6ZXMnKQ0KYGBgDQoNCg0KYGBge3J9DQprYWJsZShhJHN1bS5tYXRjaGVkW2MoMSwyLDQpXSwgZGlnaXRzID0gMiwgYWxpZ24gPSAnYycsIA0KICAgICAgY2FwdGlvbiA9ICdUYWJsZSAzOiBTdW1tYXJ5IG9mIGJhbGFuY2UgZm9yIG1hdGNoZWQgZGF0YScpDQpgYGANCg0KYGBge3J9DQpwbG90KG1hdGNoLml0LCB0eXBlID0gJ2ppdHRlcicsIGludGVyYWN0aXZlID0gRkFMU0UpDQpgYGANCg0KDQoNCiMjIHNhdmUgbmV3IGRhdGFzZXQNCg0KYGBge3J9DQpkZi5tYXRjaCA8LSBtYXRjaC5kYXRhKG1hdGNoLml0KVsxOm5jb2wobXlkYXRhKV0NCnJtKGRmLnBhdGllbnRzLCBkZi5wb3B1bGF0aW9uKQ0KYGBgDQoNCg0KIyMgbmV3IHJlc3VsdA0KDQpgYGB7cn0NCnBhY21hbjo6cF9sb2FkKHRhYmxlb25lKQ0KdGFibGU0IDwtIENyZWF0ZVRhYmxlT25lKHZhcnMgPSBjKCdBZ2UnLCAnU2V4JywgJ0Rpc3RyZXNzJyksIA0KICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSBkZi5tYXRjaCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgZmFjdG9yVmFycyA9ICdTZXgnLCANCiAgICAgICAgICAgICAgICAgICAgICAgICBzdHJhdGEgPSAnU2FtcGxlJykNCnRhYmxlNCA8LSBwcmludCh0YWJsZTQsIA0KICAgICAgICAgICAgICAgIHByaW50VG9nZ2xlID0gRkFMU0UsIA0KICAgICAgICAgICAgICAgIG5vU3BhY2VzID0gVFJVRSkNCmthYmxlKHRhYmxlNFssMTozXSwgIA0KICAgICAgYWxpZ24gPSAnYycsIA0KICAgICAgY2FwdGlvbiA9ICdUYWJsZSA0OiBDb21wYXJpc29uIG9mIG1hdGNoZWQgc2FtcGxlcycpDQpgYGANCg0K