https://datascienceplus.com/how-to-use-r-for-matching-samples-propensity-score/

PSA


pacman::p_load(knitr, wakefield, MatchIt, tableone, captioner)

0.0.0.1

library(wakefield)

0.1 create sample data and popuation data

0.1.1 sample data

set.seed(1234)
df.patients <- r_data_frame(n = 250, age(x = 30:78,   name = 'Age'),  sex(x = c("Male", "Female"),  prob = c(0.70, 0.30),  name = "Sex"))
## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
df.patients$Sample <- as.factor('Patients')

0.1.2 population data

set.seed(1234)
df.population <- r_data_frame(n = 1000, 
                              age(x = 18:80, 
                                  name = 'Age'), 
                              sex(x = c("Male", "Female"), 
                                  prob = c(0.50, 0.50), 
                                  name = "Sex"))
df.population$Sample <- as.factor('Population')

0.1.3 merge data

mydata <- rbind(df.patients, df.population)

mydata$Group <- as.logical(mydata$Sample == 'Patients')
mydata$Distress <- ifelse(mydata$Sex == 'Male', age(nrow(mydata), x = 0:42, name = 'Distress'),
                                                age(nrow(mydata), x = 15:42, name = 'Distress'))

0.1.4

library(pacman)
## Warning: package 'pacman' was built under R version 4.0.3

0.2 Table 1: Comparison of unmatched samples

pacman::p_load(tableone)
table1 <- CreateTableOne(vars = c('Age', 'Sex', 'Distress'), 
                         data = mydata, 
                         factorVars = 'Sex', 
                         strata = 'Sample')
table1 <- print(table1, 
                printToggle = FALSE, 
                noSpaces = TRUE)
kable(table1[,1:3],  
      align = 'c', 
      caption = 'Table 1: Comparison of unmatched samples')
Table 1: Comparison of unmatched samples
Patients Population p
n 250 1000
Age (mean (SD)) 53.51 (13.86) 49.06 (17.77) <0.001
Sex = Female (%) 87 (34.8) 513 (51.3) <0.001
Distress (mean (SD)) 23.53 (11.94) 25.44 (10.97) 0.015

0.3 matching

set.seed(1234)

match.it <- matchit(Group ~ Age + Sex, data = mydata, method="nearest", ratio=1)

a <- summary(match.it)
kable(a$nn, digits = 2, align = 'c', 
      caption = 'Table 2: Sample sizes')
Table 2: Sample sizes
Control Treated
All 1000 250
Matched 250 250
Unmatched 750 0
Discarded 0 0
kable(a$sum.matched[c(1,2,4)], digits = 2, align = 'c', 
      caption = 'Table 3: Summary of balance for matched data')
Table 3: Summary of balance for matched data
Means Treated Means Control Mean Diff
distance 0.22 0.22 0.00
Age 53.51 53.47 0.04
SexMale 0.65 0.65 0.00
SexFemale 0.35 0.35 0.00
plot(match.it, type = 'jitter', interactive = FALSE)

0.4 save new dataset

df.match <- match.data(match.it)[1:ncol(mydata)]
rm(df.patients, df.population)

0.5 new result

pacman::p_load(tableone)
table4 <- CreateTableOne(vars = c('Age', 'Sex', 'Distress'), 
                         data = df.match, 
                         factorVars = 'Sex', 
                         strata = 'Sample')
table4 <- print(table4, 
                printToggle = FALSE, 
                noSpaces = TRUE)
kable(table4[,1:3],  
      align = 'c', 
      caption = 'Table 4: Comparison of matched samples')
Table 4: Comparison of matched samples
Patients Population p
n 250 250
Age (mean (SD)) 53.51 (13.86) 53.47 (13.85) 0.974
Sex = Female (%) 87 (34.8) 87 (34.8) 1.000
Distress (mean (SD)) 23.53 (11.94) 23.68 (11.85) 0.884
LS0tDQp0aXRsZTogIlByb3BlbnNpdHkgU2NvcmUgQW5hbHlzaXMgKFBTQSkiDQphdXRob3I6ICJCaW5oIFRoYW5nIFRyYW4iDQpkYXRlOiAiT2N0LzIzLzIwMjAiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgY29kZV9kb3dubG9hZDogeWVzDQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgbnVtYmVyX3NlY3Rpb25zOiB5ZXMNCiAgICB0aGVtZTogam91cm5hbA0KICAgIHRvYzogeWVzDQogICAgdG9jX2Zsb2F0OiB5ZXMNCiAgd29yZF9kb2N1bWVudDoNCiAgICB0b2M6IHllcw0KLS0tDQoNCg0KaHR0cHM6Ly9kYXRhc2NpZW5jZXBsdXMuY29tL2hvdy10by11c2Utci1mb3ItbWF0Y2hpbmctc2FtcGxlcy1wcm9wZW5zaXR5LXNjb3JlLw0KDQoNCg0KDQohW1BTQV0oaHR0cHM6Ly93d3cuc3RhdGlzdGljc2hvd3RvLmNvbS93cC1jb250ZW50L3VwbG9hZHMvMjAxNS8wOS9yYW5kb21pemVkLWNvbnRyb2xsZWQtdHJpYWwucG5nKQ0KDQotLS0NCg0KYGBge3J9DQpwYWNtYW46OnBfbG9hZChrbml0ciwgd2FrZWZpZWxkLCBNYXRjaEl0LCB0YWJsZW9uZSwgY2FwdGlvbmVyKQ0KYGBgDQoNCg0KIyMjIyANCmBgYHtyfQ0KbGlicmFyeSh3YWtlZmllbGQpDQoNCmBgYA0KDQoNCiMjIGNyZWF0ZSBzYW1wbGUgZGF0YSBhbmQgcG9wdWF0aW9uIGRhdGENCg0KIyMjIHNhbXBsZSBkYXRhIA0KYGBge3J9DQpzZXQuc2VlZCgxMjM0KQ0KZGYucGF0aWVudHMgPC0gcl9kYXRhX2ZyYW1lKG4gPSAyNTAsIGFnZSh4ID0gMzA6NzgsICAgbmFtZSA9ICdBZ2UnKSwgIHNleCh4ID0gYygiTWFsZSIsICJGZW1hbGUiKSwgIHByb2IgPSBjKDAuNzAsIDAuMzApLCAgbmFtZSA9ICJTZXgiKSkNCg0KZGYucGF0aWVudHMkU2FtcGxlIDwtIGFzLmZhY3RvcignUGF0aWVudHMnKQ0KDQpgYGANCg0KDQojIyMgcG9wdWxhdGlvbiBkYXRhDQoNCg0KYGBge3J9DQpzZXQuc2VlZCgxMjM0KQ0KZGYucG9wdWxhdGlvbiA8LSByX2RhdGFfZnJhbWUobiA9IDEwMDAsIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWdlKHggPSAxODo4MCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbmFtZSA9ICdBZ2UnKSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzZXgoeCA9IGMoIk1hbGUiLCAiRmVtYWxlIiksIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHByb2IgPSBjKDAuNTAsIDAuNTApLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBuYW1lID0gIlNleCIpKQ0KZGYucG9wdWxhdGlvbiRTYW1wbGUgPC0gYXMuZmFjdG9yKCdQb3B1bGF0aW9uJykNCmBgYA0KDQoNCiMjIyBtZXJnZSBkYXRhIA0KDQpgYGB7cn0NCm15ZGF0YSA8LSByYmluZChkZi5wYXRpZW50cywgZGYucG9wdWxhdGlvbikNCg0KbXlkYXRhJEdyb3VwIDwtIGFzLmxvZ2ljYWwobXlkYXRhJFNhbXBsZSA9PSAnUGF0aWVudHMnKQ0KbXlkYXRhJERpc3RyZXNzIDwtIGlmZWxzZShteWRhdGEkU2V4ID09ICdNYWxlJywgYWdlKG5yb3cobXlkYXRhKSwgeCA9IDA6NDIsIG5hbWUgPSAnRGlzdHJlc3MnKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFnZShucm93KG15ZGF0YSksIHggPSAxNTo0MiwgbmFtZSA9ICdEaXN0cmVzcycpKQ0KYGBgDQoNCg0KIyMjDQoNCmBgYHtyfQ0KbGlicmFyeShwYWNtYW4pDQpgYGANCg0KIyMgVGFibGUgMTogQ29tcGFyaXNvbiBvZiB1bm1hdGNoZWQgc2FtcGxlcw0KDQpgYGB7cn0NCnBhY21hbjo6cF9sb2FkKHRhYmxlb25lKQ0KdGFibGUxIDwtIENyZWF0ZVRhYmxlT25lKHZhcnMgPSBjKCdBZ2UnLCAnU2V4JywgJ0Rpc3RyZXNzJyksIA0KICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSBteWRhdGEsIA0KICAgICAgICAgICAgICAgICAgICAgICAgIGZhY3RvclZhcnMgPSAnU2V4JywgDQogICAgICAgICAgICAgICAgICAgICAgICAgc3RyYXRhID0gJ1NhbXBsZScpDQp0YWJsZTEgPC0gcHJpbnQodGFibGUxLCANCiAgICAgICAgICAgICAgICBwcmludFRvZ2dsZSA9IEZBTFNFLCANCiAgICAgICAgICAgICAgICBub1NwYWNlcyA9IFRSVUUpDQprYWJsZSh0YWJsZTFbLDE6M10sICANCiAgICAgIGFsaWduID0gJ2MnLCANCiAgICAgIGNhcHRpb24gPSAnVGFibGUgMTogQ29tcGFyaXNvbiBvZiB1bm1hdGNoZWQgc2FtcGxlcycpDQpgYGANCg0KDQoNCiMjIG1hdGNoaW5nDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzNCkNCg0KbWF0Y2guaXQgPC0gbWF0Y2hpdChHcm91cCB+IEFnZSArIFNleCwgZGF0YSA9IG15ZGF0YSwgbWV0aG9kPSJuZWFyZXN0IiwgcmF0aW89MSkNCg0KYSA8LSBzdW1tYXJ5KG1hdGNoLml0KQ0KYGBgDQoNCmBgYHtyfQ0Ka2FibGUoYSRubiwgZGlnaXRzID0gMiwgYWxpZ24gPSAnYycsIA0KICAgICAgY2FwdGlvbiA9ICdUYWJsZSAyOiBTYW1wbGUgc2l6ZXMnKQ0KYGBgDQoNCg0KYGBge3J9DQprYWJsZShhJHN1bS5tYXRjaGVkW2MoMSwyLDQpXSwgZGlnaXRzID0gMiwgYWxpZ24gPSAnYycsIA0KICAgICAgY2FwdGlvbiA9ICdUYWJsZSAzOiBTdW1tYXJ5IG9mIGJhbGFuY2UgZm9yIG1hdGNoZWQgZGF0YScpDQpgYGANCg0KYGBge3J9DQpwbG90KG1hdGNoLml0LCB0eXBlID0gJ2ppdHRlcicsIGludGVyYWN0aXZlID0gRkFMU0UpDQpgYGANCg0KDQoNCiMjIHNhdmUgbmV3IGRhdGFzZXQNCg0KYGBge3J9DQpkZi5tYXRjaCA8LSBtYXRjaC5kYXRhKG1hdGNoLml0KVsxOm5jb2wobXlkYXRhKV0NCnJtKGRmLnBhdGllbnRzLCBkZi5wb3B1bGF0aW9uKQ0KYGBgDQoNCg0KIyMgbmV3IHJlc3VsdA0KDQpgYGB7cn0NCnBhY21hbjo6cF9sb2FkKHRhYmxlb25lKQ0KdGFibGU0IDwtIENyZWF0ZVRhYmxlT25lKHZhcnMgPSBjKCdBZ2UnLCAnU2V4JywgJ0Rpc3RyZXNzJyksIA0KICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSBkZi5tYXRjaCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgZmFjdG9yVmFycyA9ICdTZXgnLCANCiAgICAgICAgICAgICAgICAgICAgICAgICBzdHJhdGEgPSAnU2FtcGxlJykNCnRhYmxlNCA8LSBwcmludCh0YWJsZTQsIA0KICAgICAgICAgICAgICAgIHByaW50VG9nZ2xlID0gRkFMU0UsIA0KICAgICAgICAgICAgICAgIG5vU3BhY2VzID0gVFJVRSkNCmthYmxlKHRhYmxlNFssMTozXSwgIA0KICAgICAgYWxpZ24gPSAnYycsIA0KICAgICAgY2FwdGlvbiA9ICdUYWJsZSA0OiBDb21wYXJpc29uIG9mIG1hdGNoZWQgc2FtcGxlcycpDQpgYGANCg0K