peter prop scores

Table below shows output from Matchit Package The formula is: group ~ age + sex (as a factor) +PSPRS The “distance variable” is the propensity score. We had to drop ~40 controls because they did not have PSPRSs

print("FULL SAMPLE")

## [1] "FULL SAMPLE"

summary(m.out.nearest)

## 
## Call:
## matchit(formula = fmlaMatching, data = df.peter.matchit, method = "nearest", 
##     link = "logit")
## 
## Summary of Balance for All Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.1592        0.1157          0.5759
## Baseline_Age                      66.0000       68.5505         -0.2880
## `as.factor(female0_male1)`0        0.7333        0.5229          0.4758
## `as.factor(female0_male1)`1        0.2667        0.4771         -0.4758
## PSPRSTOT                          33.8000       30.1284          0.2417
##                             Var. Ratio eCDF Mean eCDF Max
## distance                        1.2840    0.1772   0.3529
## Baseline_Age                    1.2755    0.0815   0.2813
## `as.factor(female0_male1)`0          .    0.2104   0.2104
## `as.factor(female0_male1)`1          .    0.2104   0.2104
## PSPRSTOT                        1.2359    0.0645   0.2355
## 
## Summary of Balance for Matched Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.1592        0.1580          0.0159
## Baseline_Age                      66.0000       68.0667         -0.2334
## `as.factor(female0_male1)`0        0.7333        0.8000         -0.1508
## `as.factor(female0_male1)`1        0.2667        0.2000          0.1508
## PSPRSTOT                          33.8000       35.2000         -0.0922
##                             Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance                        1.0387    0.0083   0.0667          0.0248
## Baseline_Age                    1.4940    0.0688   0.2000          0.7302
## `as.factor(female0_male1)`0          .    0.0667   0.0667          0.7538
## `as.factor(female0_male1)`1          .    0.0667   0.0667          0.7538
## PSPRSTOT                        1.2982    0.0575   0.2667          0.8734
## 
## Sample Sizes:
##           Control Treated
## All           109      15
## Matched        15      15
## Unmatched      94       0
## Discarded       0       0

print("CBS SAMPLE")

## [1] "CBS SAMPLE"

summary(m.out.nearest.CBS)

## 
## Call:
## matchit(formula = fmlaMatching, data = df.peter.matchit.CBS, 
##     method = "nearest", link = "logit")
## 
## Summary of Balance for All Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.1527        0.1367          0.3652
## Baseline_Age                      64.8000       67.2581         -0.2310
## `as.factor(female0_male1)`0        0.6000        0.4839          0.2370
## `as.factor(female0_male1)`1        0.4000        0.5161         -0.2370
## PSPRSTOT                          25.6000       23.9355          0.1742
##                             Var. Ratio eCDF Mean eCDF Max
## distance                        0.8457    0.1294   0.3419
## Baseline_Age                    1.2541    0.1399   0.4129
## `as.factor(female0_male1)`0          .    0.1161   0.1161
## `as.factor(female0_male1)`1          .    0.1161   0.1161
## PSPRSTOT                        0.6966    0.0854   0.2129
## 
## Summary of Balance for Matched Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.1527        0.1556         -0.0680
## Baseline_Age                      64.8000       64.4000          0.0376
## `as.factor(female0_male1)`0        0.6000        0.6000          0.0000
## `as.factor(female0_male1)`1        0.4000        0.4000          0.0000
## PSPRSTOT                          25.6000       27.0000         -0.1465
##                             Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance                        0.9750    0.0278      0.2          0.0682
## Baseline_Age                    7.6486    0.1545      0.4          0.8271
## `as.factor(female0_male1)`0          .    0.0000      0.0          0.4000
## `as.factor(female0_male1)`1          .    0.0000      0.0          0.4000
## PSPRSTOT                        0.3609    0.1200      0.4          1.1512
## 
## Sample Sizes:
##           Control Treated
## All            31       5
## Matched         5       5
## Unmatched      26       0
## Discarded       0       0

print("PSP SAMPLE")

## [1] "PSP SAMPLE"

summary(m.out.nearest.PSP)

## 
## Call:
## matchit(formula = fmlaMatching, data = df.peter.matchit.PSP, 
##     method = "nearest", link = "logit")
## 
## Summary of Balance for All Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.1741        0.1059          0.6769
## Baseline_Age                      66.6000       69.0641         -0.2935
## `as.factor(female0_male1)`0        0.8000        0.5385          0.6538
## `as.factor(female0_male1)`1        0.2000        0.4615         -0.6538
## PSPRSTOT                          37.9000       32.5897          0.3279
##                             Var. Ratio eCDF Mean eCDF Max
## distance                        1.4917    0.2164   0.4231
## Baseline_Age                    1.4059    0.0954   0.2154
## `as.factor(female0_male1)`0          .    0.2615   0.2615
## `as.factor(female0_male1)`1          .    0.2615   0.2615
## PSPRSTOT                        1.3862    0.0976   0.3128
## 
## Summary of Balance for Matched Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.1741        0.1772         -0.0309
## Baseline_Age                      66.6000       69.1000         -0.2978
## `as.factor(female0_male1)`0        0.8000        1.0000         -0.5000
## `as.factor(female0_male1)`1        0.2000        0.0000          0.5000
## PSPRSTOT                          37.9000       35.8000          0.1297
##                             Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance                        0.9095    0.0116      0.1          0.0836
## Baseline_Age                    1.7291    0.1115      0.3          0.8933
## `as.factor(female0_male1)`0          .    0.2000      0.2          0.5000
## `as.factor(female0_male1)`1          .    0.2000      0.2          0.5000
## PSPRSTOT                        1.4347    0.0804      0.2          0.6606
## 
## Sample Sizes:
##           Control Treated
## All            78      10
## Matched        10      10
## Unmatched      68       0
## Discarded       0       0

Comparison of propensity scores between groups. There was orginally an outlier in the treatment group, this participant was removed based on our previous convo. The age of the participant was 44.

print("FULL SAMPLE")

## [1] "FULL SAMPLE"

plot(m.out.nearest, type = "jitter", interactive = FALSE)

print("CBS SAMPLE")

## [1] "CBS SAMPLE"

plot(m.out.nearest.CBS, type = "jitter", interactive = FALSE)

print("PSP SAMPLE")

## [1] "PSP SAMPLE"

plot(m.out.nearest.PSP, type = "jitter", interactive = FALSE)

Density Plots Black line = cases, gray line = controls Left graph is full dataset, right graph is matched dataset To me the matching is better, but it’s not perfect.

fmlaMatching<-NULL
fmlaMatching <- as.formula("Rockit_1_4RNTNI_0~Baseline_Age+female0_male1+PSPRSTOT")
m.out.nearest <- matchit(fmlaMatching, method = "nearest", 
                 data = df.peter.matchit, link = "logit")
m.out.nearest.CBS <- matchit(fmlaMatching, method = "nearest", 
                 data = df.peter.matchit.CBS, link = "logit")
m.out.nearest.PSP <- matchit(fmlaMatching, method = "nearest", 
                 data = df.peter.matchit.PSP, link = "logit")

print("FULL SAMPLE")

## [1] "FULL SAMPLE"

print(plot(m.out.nearest, type = "density", interactive = FALSE,
     which.xs = ~Baseline_Age + female0_male1  + PSPRSTOT))

## A matchit object
##  - method: 1:1 nearest neighbor matching without replacement
##  - distance: Propensity score
##              - estimated with logistic regression
##  - number of obs.: 124 (original), 30 (matched)
##  - target estimand: ATT
##  - covariates: Baseline_Age, female0_male1, PSPRSTOT

print("CBS SAMPLE")

## [1] "CBS SAMPLE"

print(plot(m.out.nearest.CBS, type = "density", interactive = FALSE,
     which.xs = ~Baseline_Age + female0_male1  + PSPRSTOT))

## A matchit object
##  - method: 1:1 nearest neighbor matching without replacement
##  - distance: Propensity score
##              - estimated with logistic regression
##  - number of obs.: 36 (original), 10 (matched)
##  - target estimand: ATT
##  - covariates: Baseline_Age, female0_male1, PSPRSTOT

print("PSP SAMPLE")

## [1] "PSP SAMPLE"

print(plot(m.out.nearest.PSP, type = "density", interactive = FALSE,
     which.xs = ~Baseline_Age + female0_male1  + PSPRSTOT))

## A matchit object
##  - method: 1:1 nearest neighbor matching without replacement
##  - distance: Propensity score
##              - estimated with logistic regression
##  - number of obs.: 88 (original), 20 (matched)
##  - target estimand: ATT
##  - covariates: Baseline_Age, female0_male1, PSPRSTOT

peter prop scores

Mark Sanderson-cimino

2023-06-21