library(GGally)
library(ggplot2)
library(haven)
library(MatchIt)
library(cobalt)
library(haven)
library(readxl)
library(writexl)

df <- read_excel("DATABASE_for_Vlad.xlsx")
    df$robotic<-as.factor(df$robotic)
    df$dx<-as.factor(df$dx)

Matching scenario 1 (df_psm1): nearest neighbor 2:1 w/o caliper

df_psm1 <- matchit(robotic ~ sex2f + age + bmi30 + pci + dx, 
                  data = df, method = "nearest", ratio = 2)
summary(df_psm1)

## 
## Call:
## matchit(formula = robotic ~ sex2f + age + bmi30 + pci + dx, data = df, 
##     method = "nearest", ratio = 2)
## 
## Summary of Balance for All Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.4008        0.1266          1.2399     1.8586    0.3679
## sex2f           1.6667        1.7324         -0.1347     1.1977    0.0329
## age            65.6333       60.7634          0.3798     1.1839    0.1174
## bmi30           0.6000        0.3803          0.4485          .    0.2197
## pci             5.6000       11.0282         -1.2462     0.1969    0.1586
## dx1             0.6667        0.3239          0.7270          .    0.3427
## dx2             0.0000        0.2676         -0.6653          .    0.2676
## dx8             0.1333        0.2817         -0.4364          .    0.1484
## dx9             0.2000        0.1268          0.1831          .    0.0732
##          eCDF Max
## distance   0.6554
## sex2f      0.0657
## age        0.2751
## bmi30      0.2197
## pci        0.3390
## dx1        0.3427
## dx2        0.2676
## dx8        0.1484
## dx9        0.0732
## 
## Summary of Balance for Matched Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.4008        0.2673          0.6036     1.8597    0.1054
## sex2f           1.6667        1.8333         -0.3416     1.6571    0.0833
## age            65.6333       62.9267          0.2111     1.1448    0.0675
## bmi30           0.6000        0.4667          0.2722          .    0.1333
## pci             5.6000        7.2333         -0.3750     0.6228    0.0697
## dx1             0.6667        0.5667          0.2121          .    0.1000
## dx2             0.0000        0.0000          0.0000          .    0.0000
## dx8             0.1333        0.2333         -0.2942          .    0.1000
## dx9             0.2000        0.2000          0.0000          .    0.0000
##          eCDF Max Std. Pair Dist.
## distance   0.3667          0.6401
## sex2f      0.1667          0.8881
## age        0.2000          1.2188
## bmi30      0.1333          1.0887
## pci        0.2333          1.2321
## dx1        0.1000          1.2021
## dx2        0.0000          0.0000
## dx8        0.1000          0.6864
## dx9        0.0000          0.4000
## 
## Sample Sizes:
##           Control Treated
## All            71      15
## Matched        30      15
## Unmatched      41       0
## Discarded       0       0

df_psm1

## A matchit object
##  - method: 2:1 nearest neighbor matching without replacement
##  - distance: Propensity score
##              - estimated with logistic regression
##  - number of obs.: 86 (original), 45 (matched)
##  - target estimand: ATT
##  - covariates: sex2f, age, bmi30, pci, dx

summary(df_psm1)$sum.all %>% round(2) # before matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.40          0.13            1.24       1.86      0.37
## sex2f             1.67          1.73           -0.13       1.20      0.03
## age              65.63         60.76            0.38       1.18      0.12
## bmi30             0.60          0.38            0.45         NA      0.22
## pci               5.60         11.03           -1.25       0.20      0.16
## dx1               0.67          0.32            0.73         NA      0.34
## dx2               0.00          0.27           -0.67         NA      0.27
## dx8               0.13          0.28           -0.44         NA      0.15
## dx9               0.20          0.13            0.18         NA      0.07
##          eCDF Max Std. Pair Dist.
## distance     0.66              NA
## sex2f        0.07              NA
## age          0.28              NA
## bmi30        0.22              NA
## pci          0.34              NA
## dx1          0.34              NA
## dx2          0.27              NA
## dx8          0.15              NA
## dx9          0.07              NA

summary(df_psm1)$sum.matched %>% round(2) # after matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.40          0.27            0.60       1.86      0.11
## sex2f             1.67          1.83           -0.34       1.66      0.08
## age              65.63         62.93            0.21       1.14      0.07
## bmi30             0.60          0.47            0.27         NA      0.13
## pci               5.60          7.23           -0.37       0.62      0.07
## dx1               0.67          0.57            0.21         NA      0.10
## dx2               0.00          0.00            0.00         NA      0.00
## dx8               0.13          0.23           -0.29         NA      0.10
## dx9               0.20          0.20            0.00         NA      0.00
##          eCDF Max Std. Pair Dist.
## distance     0.37            0.64
## sex2f        0.17            0.89
## age          0.20            1.22
## bmi30        0.13            1.09
## pci          0.23            1.23
## dx1          0.10            1.20
## dx2          0.00            0.00
## dx8          0.10            0.69
## dx9          0.00            0.40

summary(df_psm1)$reduction # change in percent

## NULL

summary(df_psm1)$nn # flowchart

##               Control Treated
## All (ESS)          71      15
## All                71      15
## Matched (ESS)      30      15
## Matched            30      15
## Unmatched          41       0
## Discarded           0       0

plot(summary(df_psm1))

bal.plot(df_psm1, 
         var.name = "sex2f")

bal.plot(df_psm1, 
         var.name = "age")

bal.plot(df_psm1, 
         var.name = "bmi30")

bal.plot(df_psm1, 
         var.name = "pci")

bal.plot(df_psm1, 
         var.name = "dx")

Matching scenario 2 (df_psm2): nearest neighbor 2:1 caliper 0.4 (loose)

df_psm2 <- matchit(robotic ~ sex2f + age + bmi30 + pci + dx, 
                  data = df, method = "nearest", caliper=0.4, ratio = 2)
summary(df_psm2)

## 
## Call:
## matchit(formula = robotic ~ sex2f + age + bmi30 + pci + dx, data = df, 
##     method = "nearest", caliper = 0.4, ratio = 2)
## 
## Summary of Balance for All Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.4008        0.1266          1.2399     1.8586    0.3679
## sex2f           1.6667        1.7324         -0.1347     1.1977    0.0329
## age            65.6333       60.7634          0.3798     1.1839    0.1174
## bmi30           0.6000        0.3803          0.4485          .    0.2197
## pci             5.6000       11.0282         -1.2462     0.1969    0.1586
## dx1             0.6667        0.3239          0.7270          .    0.3427
## dx2             0.0000        0.2676         -0.6653          .    0.2676
## dx8             0.1333        0.2817         -0.4364          .    0.1484
## dx9             0.2000        0.1268          0.1831          .    0.0732
##          eCDF Max
## distance   0.6554
## sex2f      0.0657
## age        0.2751
## bmi30      0.2197
## pci        0.3390
## dx1        0.3427
## dx2        0.2676
## dx8        0.1484
## dx9        0.0732
## 
## Summary of Balance for Matched Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.3232        0.3164          0.0310     0.9253    0.0217
## sex2f           1.7273        1.7727         -0.0932     1.1756    0.0227
## age            62.5091       60.8091          0.1326     1.0186    0.0618
## bmi30           0.5455        0.5455          0.0000          .    0.0000
## pci             6.5455        5.9545          0.1357     0.7474    0.0496
## dx1             0.6364        0.6364          0.0000          .    0.0000
## dx2             0.0000        0.0000          0.0000          .    0.0000
## dx8             0.1818        0.2273         -0.1337          .    0.0455
## dx9             0.1818        0.1364          0.1136          .    0.0455
##          eCDF Max Std. Pair Dist.
## distance   0.1818          0.1116
## sex2f      0.0455          0.6148
## age        0.2273          1.4055
## bmi30      0.0000          0.4000
## pci        0.2273          1.1709
## dx1        0.0000          0.5000
## dx2        0.0000          0.0000
## dx8        0.0455          1.0296
## dx9        0.0455          0.8750
## 
## Sample Sizes:
##               Control Treated
## All             71.        15
## Matched (ESS)   18.62      11
## Matched         20.        11
## Unmatched       51.         4
## Discarded        0.         0

df_psm2

## A matchit object
##  - method: 2:1 nearest neighbor matching without replacement
##  - distance: Propensity score [caliper]
##              - estimated with logistic regression
##  - caliper: <distance> (0.081)
##  - number of obs.: 86 (original), 31 (matched)
##  - target estimand: ATT
##  - covariates: sex2f, age, bmi30, pci, dx

summary(df_psm2)$sum.all %>% round(2) # before matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.40          0.13            1.24       1.86      0.37
## sex2f             1.67          1.73           -0.13       1.20      0.03
## age              65.63         60.76            0.38       1.18      0.12
## bmi30             0.60          0.38            0.45         NA      0.22
## pci               5.60         11.03           -1.25       0.20      0.16
## dx1               0.67          0.32            0.73         NA      0.34
## dx2               0.00          0.27           -0.67         NA      0.27
## dx8               0.13          0.28           -0.44         NA      0.15
## dx9               0.20          0.13            0.18         NA      0.07
##          eCDF Max Std. Pair Dist.
## distance     0.66              NA
## sex2f        0.07              NA
## age          0.28              NA
## bmi30        0.22              NA
## pci          0.34              NA
## dx1          0.34              NA
## dx2          0.27              NA
## dx8          0.15              NA
## dx9          0.07              NA

summary(df_psm2)$sum.matched %>% round(2) # after matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.32          0.32            0.03       0.93      0.02
## sex2f             1.73          1.77           -0.09       1.18      0.02
## age              62.51         60.81            0.13       1.02      0.06
## bmi30             0.55          0.55            0.00         NA      0.00
## pci               6.55          5.95            0.14       0.75      0.05
## dx1               0.64          0.64            0.00         NA      0.00
## dx2               0.00          0.00            0.00         NA      0.00
## dx8               0.18          0.23           -0.13         NA      0.05
## dx9               0.18          0.14            0.11         NA      0.05
##          eCDF Max Std. Pair Dist.
## distance     0.18            0.11
## sex2f        0.05            0.61
## age          0.23            1.41
## bmi30        0.00            0.40
## pci          0.23            1.17
## dx1          0.00            0.50
## dx2          0.00            0.00
## dx8          0.05            1.03
## dx9          0.05            0.87

summary(df_psm2)$reduction # change in percent

## NULL

summary(df_psm2)$nn # flowchart

##                Control Treated
## All (ESS)     71.00000      15
## All           71.00000      15
## Matched (ESS) 18.61538      11
## Matched       20.00000      11
## Unmatched     51.00000       4
## Discarded      0.00000       0

plot(summary(df_psm2))

bal.plot(df_psm2, 
         var.name = "sex2f")

bal.plot(df_psm2, 
         var.name = "age")

bal.plot(df_psm2, 
         var.name = "bmi30")

bal.plot(df_psm2, 
         var.name = "pci")

bal.plot(df_psm2, 
         var.name = "dx")

Matching scenario 3 (df_psm3): nearest neighbor 1:1 w/o caliper (greedy)

df_psm3 <- matchit(robotic ~ sex2f + age + bmi30 + pci + dx, 
                  data = df, method = "nearest", ratio = 1)
summary(df_psm3)

## 
## Call:
## matchit(formula = robotic ~ sex2f + age + bmi30 + pci + dx, data = df, 
##     method = "nearest", ratio = 1)
## 
## Summary of Balance for All Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.4008        0.1266          1.2399     1.8586    0.3679
## sex2f           1.6667        1.7324         -0.1347     1.1977    0.0329
## age            65.6333       60.7634          0.3798     1.1839    0.1174
## bmi30           0.6000        0.3803          0.4485          .    0.2197
## pci             5.6000       11.0282         -1.2462     0.1969    0.1586
## dx1             0.6667        0.3239          0.7270          .    0.3427
## dx2             0.0000        0.2676         -0.6653          .    0.2676
## dx8             0.1333        0.2817         -0.4364          .    0.1484
## dx9             0.2000        0.1268          0.1831          .    0.0732
##          eCDF Max
## distance   0.6554
## sex2f      0.0657
## age        0.2751
## bmi30      0.2197
## pci        0.3390
## dx1        0.3427
## dx2        0.2676
## dx8        0.1484
## dx9        0.0732
## 
## Summary of Balance for Matched Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.4008        0.3491          0.2340     1.3982    0.0302
## sex2f           1.6667        1.6667          0.0000     1.0000    0.0000
## age            65.6333       61.8467          0.2953     1.2498    0.1060
## bmi30           0.6000        0.5333          0.1361          .    0.0667
## pci             5.6000        6.0000         -0.0918     0.5700    0.0455
## dx1             0.6667        0.7333         -0.1414          .    0.0667
## dx2             0.0000        0.0000          0.0000          .    0.0000
## dx8             0.1333        0.0667          0.1961          .    0.0667
## dx9             0.2000        0.2000          0.0000          .    0.0000
##          eCDF Max Std. Pair Dist.
## distance   0.2667          0.3055
## sex2f      0.0000          0.5333
## age        0.2667          1.2032
## bmi30      0.0667          0.9526
## pci        0.2000          1.2551
## dx1        0.0667          0.9899
## dx2        0.0000          0.0000
## dx8        0.0667          0.1961
## dx9        0.0000          0.4000
## 
## Sample Sizes:
##           Control Treated
## All            71      15
## Matched        15      15
## Unmatched      56       0
## Discarded       0       0

df_psm3

## A matchit object
##  - method: 1:1 nearest neighbor matching without replacement
##  - distance: Propensity score
##              - estimated with logistic regression
##  - number of obs.: 86 (original), 30 (matched)
##  - target estimand: ATT
##  - covariates: sex2f, age, bmi30, pci, dx

summary(df_psm3)$sum.all %>% round(2) # before matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.40          0.13            1.24       1.86      0.37
## sex2f             1.67          1.73           -0.13       1.20      0.03
## age              65.63         60.76            0.38       1.18      0.12
## bmi30             0.60          0.38            0.45         NA      0.22
## pci               5.60         11.03           -1.25       0.20      0.16
## dx1               0.67          0.32            0.73         NA      0.34
## dx2               0.00          0.27           -0.67         NA      0.27
## dx8               0.13          0.28           -0.44         NA      0.15
## dx9               0.20          0.13            0.18         NA      0.07
##          eCDF Max Std. Pair Dist.
## distance     0.66              NA
## sex2f        0.07              NA
## age          0.28              NA
## bmi30        0.22              NA
## pci          0.34              NA
## dx1          0.34              NA
## dx2          0.27              NA
## dx8          0.15              NA
## dx9          0.07              NA

summary(df_psm3)$sum.matched %>% round(2) # after matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.40          0.35            0.23       1.40      0.03
## sex2f             1.67          1.67            0.00       1.00      0.00
## age              65.63         61.85            0.30       1.25      0.11
## bmi30             0.60          0.53            0.14         NA      0.07
## pci               5.60          6.00           -0.09       0.57      0.05
## dx1               0.67          0.73           -0.14         NA      0.07
## dx2               0.00          0.00            0.00         NA      0.00
## dx8               0.13          0.07            0.20         NA      0.07
## dx9               0.20          0.20            0.00         NA      0.00
##          eCDF Max Std. Pair Dist.
## distance     0.27            0.31
## sex2f        0.00            0.53
## age          0.27            1.20
## bmi30        0.07            0.95
## pci          0.20            1.26
## dx1          0.07            0.99
## dx2          0.00            0.00
## dx8          0.07            0.20
## dx9          0.00            0.40

summary(df_psm3)$reduction # change in percent

## NULL

summary(df_psm3)$nn # flowchart

##               Control Treated
## All (ESS)          71      15
## All                71      15
## Matched (ESS)      15      15
## Matched            15      15
## Unmatched          56       0
## Discarded           0       0

plot(summary(df_psm3))

bal.plot(df_psm3, 
         var.name = "sex2f")

bal.plot(df_psm3, 
         var.name = "age")

bal.plot(df_psm3, 
         var.name = "bmi30")

bal.plot(df_psm3, 
         var.name = "pci")

bal.plot(df_psm3, 
         var.name = "dx")

Matching scenario 4 (df_psm4): nearest neighbor 1:1 caliper 0.4

df_psm4 <- matchit(robotic ~ sex2f + age + bmi30 + pci + dx, 
                  data = df, method = "nearest", caliper=0.4, ratio = 1)
summary(df_psm4)

## 
## Call:
## matchit(formula = robotic ~ sex2f + age + bmi30 + pci + dx, data = df, 
##     method = "nearest", caliper = 0.4, ratio = 1)
## 
## Summary of Balance for All Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.4008        0.1266          1.2399     1.8586    0.3679
## sex2f           1.6667        1.7324         -0.1347     1.1977    0.0329
## age            65.6333       60.7634          0.3798     1.1839    0.1174
## bmi30           0.6000        0.3803          0.4485          .    0.2197
## pci             5.6000       11.0282         -1.2462     0.1969    0.1586
## dx1             0.6667        0.3239          0.7270          .    0.3427
## dx2             0.0000        0.2676         -0.6653          .    0.2676
## dx8             0.1333        0.2817         -0.4364          .    0.1484
## dx9             0.2000        0.1268          0.1831          .    0.0732
##          eCDF Max
## distance   0.6554
## sex2f      0.0657
## age        0.2751
## bmi30      0.2197
## pci        0.3390
## dx1        0.3427
## dx2        0.2676
## dx8        0.1484
## dx9        0.0732
## 
## Summary of Balance for Matched Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.3232        0.3253         -0.0093     0.9123    0.0127
## sex2f           1.7273        1.7273          0.0000     1.0000    0.0000
## age            62.5091       63.0182         -0.0397     1.2548    0.0711
## bmi30           0.5455        0.5455          0.0000          .    0.0000
## pci             6.5455        7.0000         -0.1044     0.5821    0.0455
## dx1             0.6364        0.7273         -0.1928          .    0.0909
## dx2             0.0000        0.0000          0.0000          .    0.0000
## dx8             0.1818        0.0909          0.2674          .    0.0909
## dx9             0.1818        0.1818          0.0000          .    0.0000
##          eCDF Max Std. Pair Dist.
## distance   0.0909          0.0893
## sex2f      0.0000          0.3636
## age        0.1818          1.3018
## bmi30      0.0000          0.3636
## pci        0.1818          1.2732
## dx1        0.0909          0.9642
## dx2        0.0000          0.0000
## dx8        0.0909          0.2674
## dx9        0.0000          0.3636
## 
## Sample Sizes:
##           Control Treated
## All            71      15
## Matched        11      11
## Unmatched      60       4
## Discarded       0       0

df_psm4

## A matchit object
##  - method: 1:1 nearest neighbor matching without replacement
##  - distance: Propensity score [caliper]
##              - estimated with logistic regression
##  - caliper: <distance> (0.081)
##  - number of obs.: 86 (original), 22 (matched)
##  - target estimand: ATT
##  - covariates: sex2f, age, bmi30, pci, dx

summary(df_psm4)$sum.all %>% round(2) # before matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.40          0.13            1.24       1.86      0.37
## sex2f             1.67          1.73           -0.13       1.20      0.03
## age              65.63         60.76            0.38       1.18      0.12
## bmi30             0.60          0.38            0.45         NA      0.22
## pci               5.60         11.03           -1.25       0.20      0.16
## dx1               0.67          0.32            0.73         NA      0.34
## dx2               0.00          0.27           -0.67         NA      0.27
## dx8               0.13          0.28           -0.44         NA      0.15
## dx9               0.20          0.13            0.18         NA      0.07
##          eCDF Max Std. Pair Dist.
## distance     0.66              NA
## sex2f        0.07              NA
## age          0.28              NA
## bmi30        0.22              NA
## pci          0.34              NA
## dx1          0.34              NA
## dx2          0.27              NA
## dx8          0.15              NA
## dx9          0.07              NA

summary(df_psm4)$sum.matched %>% round(2) # after matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.32          0.33           -0.01       0.91      0.01
## sex2f             1.73          1.73            0.00       1.00      0.00
## age              62.51         63.02           -0.04       1.25      0.07
## bmi30             0.55          0.55            0.00         NA      0.00
## pci               6.55          7.00           -0.10       0.58      0.05
## dx1               0.64          0.73           -0.19         NA      0.09
## dx2               0.00          0.00            0.00         NA      0.00
## dx8               0.18          0.09            0.27         NA      0.09
## dx9               0.18          0.18            0.00         NA      0.00
##          eCDF Max Std. Pair Dist.
## distance     0.09            0.09
## sex2f        0.00            0.36
## age          0.18            1.30
## bmi30        0.00            0.36
## pci          0.18            1.27
## dx1          0.09            0.96
## dx2          0.00            0.00
## dx8          0.09            0.27
## dx9          0.00            0.36

summary(df_psm4)$reduction # change in percent

## NULL

summary(df_psm4)$nn # flowchart

##               Control Treated
## All (ESS)          71      15
## All                71      15
## Matched (ESS)      11      11
## Matched            11      11
## Unmatched          60       4
## Discarded           0       0

plot(summary(df_psm4))

bal.plot(df_psm4, 
         var.name = "sex2f")

bal.plot(df_psm4, 
         var.name = "age")

bal.plot(df_psm4, 
         var.name = "bmi30")

bal.plot(df_psm4, 
         var.name = "pci")

bal.plot(df_psm4, 
         var.name = "dx")

Matching scenario 5 (df_psm5): nearest neighbor 1:1 caliper 0.35

df_psm5 <- matchit(robotic ~ sex2f + age + bmi30 + pci + dx, 
                  data = df, method = "nearest", caliper=0.35, ratio = 1)
summary(df_psm5)

## 
## Call:
## matchit(formula = robotic ~ sex2f + age + bmi30 + pci + dx, data = df, 
##     method = "nearest", caliper = 0.35, ratio = 1)
## 
## Summary of Balance for All Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.4008        0.1266          1.2399     1.8586    0.3679
## sex2f           1.6667        1.7324         -0.1347     1.1977    0.0329
## age            65.6333       60.7634          0.3798     1.1839    0.1174
## bmi30           0.6000        0.3803          0.4485          .    0.2197
## pci             5.6000       11.0282         -1.2462     0.1969    0.1586
## dx1             0.6667        0.3239          0.7270          .    0.3427
## dx2             0.0000        0.2676         -0.6653          .    0.2676
## dx8             0.1333        0.2817         -0.4364          .    0.1484
## dx9             0.2000        0.1268          0.1831          .    0.0732
##          eCDF Max
## distance   0.6554
## sex2f      0.0657
## age        0.2751
## bmi30      0.2197
## pci        0.3390
## dx1        0.3427
## dx2        0.2676
## dx8        0.1484
## dx9        0.0732
## 
## Summary of Balance for Matched Data:
##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance        0.3232        0.3253         -0.0093     0.9123    0.0127
## sex2f           1.7273        1.7273          0.0000     1.0000    0.0000
## age            62.5091       63.0182         -0.0397     1.2548    0.0711
## bmi30           0.5455        0.5455          0.0000          .    0.0000
## pci             6.5455        7.0000         -0.1044     0.5821    0.0455
## dx1             0.6364        0.7273         -0.1928          .    0.0909
## dx2             0.0000        0.0000          0.0000          .    0.0000
## dx8             0.1818        0.0909          0.2674          .    0.0909
## dx9             0.1818        0.1818          0.0000          .    0.0000
##          eCDF Max Std. Pair Dist.
## distance   0.0909          0.0893
## sex2f      0.0000          0.3636
## age        0.1818          1.3018
## bmi30      0.0000          0.3636
## pci        0.1818          1.2732
## dx1        0.0909          0.9642
## dx2        0.0000          0.0000
## dx8        0.0909          0.2674
## dx9        0.0000          0.3636
## 
## Sample Sizes:
##           Control Treated
## All            71      15
## Matched        11      11
## Unmatched      60       4
## Discarded       0       0

df_psm5

## A matchit object
##  - method: 1:1 nearest neighbor matching without replacement
##  - distance: Propensity score [caliper]
##              - estimated with logistic regression
##  - caliper: <distance> (0.071)
##  - number of obs.: 86 (original), 22 (matched)
##  - target estimand: ATT
##  - covariates: sex2f, age, bmi30, pci, dx

summary(df_psm5)$sum.all %>% round(2) # before matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.40          0.13            1.24       1.86      0.37
## sex2f             1.67          1.73           -0.13       1.20      0.03
## age              65.63         60.76            0.38       1.18      0.12
## bmi30             0.60          0.38            0.45         NA      0.22
## pci               5.60         11.03           -1.25       0.20      0.16
## dx1               0.67          0.32            0.73         NA      0.34
## dx2               0.00          0.27           -0.67         NA      0.27
## dx8               0.13          0.28           -0.44         NA      0.15
## dx9               0.20          0.13            0.18         NA      0.07
##          eCDF Max Std. Pair Dist.
## distance     0.66              NA
## sex2f        0.07              NA
## age          0.28              NA
## bmi30        0.22              NA
## pci          0.34              NA
## dx1          0.34              NA
## dx2          0.27              NA
## dx8          0.15              NA
## dx9          0.07              NA

summary(df_psm5)$sum.matched %>% round(2) # after matching

##          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance          0.32          0.33           -0.01       0.91      0.01
## sex2f             1.73          1.73            0.00       1.00      0.00
## age              62.51         63.02           -0.04       1.25      0.07
## bmi30             0.55          0.55            0.00         NA      0.00
## pci               6.55          7.00           -0.10       0.58      0.05
## dx1               0.64          0.73           -0.19         NA      0.09
## dx2               0.00          0.00            0.00         NA      0.00
## dx8               0.18          0.09            0.27         NA      0.09
## dx9               0.18          0.18            0.00         NA      0.00
##          eCDF Max Std. Pair Dist.
## distance     0.09            0.09
## sex2f        0.00            0.36
## age          0.18            1.30
## bmi30        0.00            0.36
## pci          0.18            1.27
## dx1          0.09            0.96
## dx2          0.00            0.00
## dx8          0.09            0.27
## dx9          0.00            0.36

summary(df_psm5)$reduction # change in percent

## NULL

summary(df_psm5)$nn # flowchart

##               Control Treated
## All (ESS)          71      15
## All                71      15
## Matched (ESS)      11      11
## Matched            11      11
## Unmatched          60       4
## Discarded           0       0

plot(summary(df_psm5))

bal.plot(df_psm5, 
         var.name = "sex2f")

bal.plot(df_psm5, 
         var.name = "age")

bal.plot(df_psm5, 
         var.name = "bmi30")

bal.plot(df_psm5, 
         var.name = "pci")

bal.plot(df_psm5, 
         var.name = "dx")

Balances check on Love Plot <3

max_length <- max(length(get.w(df_psm1)), length(get.w(df_psm2)), length(get.w(df_psm3)), length(get.w(df_psm4)), length(get.w(df_psm5)))

weights <- data.frame(
  df_psm1 = c(get.w(df_psm1), rep(NA, max_length - length(get.w(df_psm1)))),
  df_psm2 = c(get.w(df_psm2), rep(NA, max_length - length(get.w(df_psm2)))),
  df_psm3 = c(get.w(df_psm3), rep(NA, max_length - length(get.w(df_psm3)))),
  df_psm4 = c(get.w(df_psm4), rep(NA, max_length - length(get.w(df_psm4)))),
  df_psm5 = c(get.w(df_psm5), rep(NA, max_length - length(get.w(df_psm5))))
)

bal_tab <- bal.tab(robotic ~ sex2f + age + bmi30 + pci + dx, 
                   data = df, 
                   weights = weights, 
                   method = "matching", 
                   binary = "std")

## Note: `s.d.denom` not specified; assuming "treated" for df_psm1, "pooled" for
## df_psm2, "treated" for df_psm3, "pooled" for df_psm4, and "pooled" for df_psm5.

love.plot(bal_tab, 
          shapes = c("square", "circle", "triangle", "diamond", "star"),
          line=TRUE,
          threshold = 0.3) + 
  scale_color_hue()

## Warning: The argument to `shape` must be 6 valid shapes. See `?love.plot` for more information.
## Using default shapes instead.

## Warning: Unadjusted values are missing. This can occur when `un = FALSE` and
## `quick = TRUE` in the original call to `bal.tab()`.

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Exporting all

psm_list <- list(df_psm1, df_psm2, df_psm3, df_psm4, df_psm5)
file_names <- c('df_psm1.xlsx', 'df_psm2.xlsx', 'df_psm3.xlsx', 'df_psm4.xlsx', 'df_psm5.xlsx')


for (i in seq_along(psm_list)) {
  
 
  df_matched <- match.data(psm_list[[i]], data = df)
  
 
  cat("For", file_names[i], ":\n")
  cat("robotic == 1:", nrow(df_matched[df_matched$robotic == 1,]), "\n")
  cat("robotic == 0:", nrow(df_matched[df_matched$robotic == 0,]), "\n")
  cat("Dimensions of matched data:", dim(df_matched), "\n\n")

  write_xlsx(df_matched, file_names[i])
}

## For df_psm1.xlsx :
## robotic == 1: 15 
## robotic == 0: 30 
## Dimensions of matched data: 45 23 
## 
## For df_psm2.xlsx :
## robotic == 1: 11 
## robotic == 0: 20 
## Dimensions of matched data: 31 23 
## 
## For df_psm3.xlsx :
## robotic == 1: 15 
## robotic == 0: 15 
## Dimensions of matched data: 30 23 
## 
## For df_psm4.xlsx :
## robotic == 1: 11 
## robotic == 0: 11 
## Dimensions of matched data: 22 23 
## 
## For df_psm5.xlsx :
## robotic == 1: 11 
## robotic == 0: 11 
## Dimensions of matched data: 22 23

Robotic CRS/HIPEC PSM

Kovalik

2024-09-4

Matching scenario 1 (df_psm1): nearest neighbor 2:1 w/o caliper

Matching scenario 2 (df_psm2): nearest neighbor 2:1 caliper 0.4 (loose)

Matching scenario 3 (df_psm3): nearest neighbor 1:1 w/o caliper (greedy)

Matching scenario 4 (df_psm4): nearest neighbor 1:1 caliper 0.4

Matching scenario 5 (df_psm5): nearest neighbor 1:1 caliper 0.35

Balances check on Love Plot <3

Exporting all