Propensity Score Analysis

Task 1: So sánh nguy cơ gãy xương giữa nam và nữ dùng mô hình hồi quy logistic

Đọc dữ liệu “hip fracture data.csv” vào R và gọi đối tượng là os.

os = read.csv("C:\\Users\\Nguyen\\Desktop\\Workshop NCKH 2019\\Dataset\\Hip fracture data.csv")
os$hipfx = as.factor(os$hipfx)
os = na.omit(os[, c("hipfx", "age", "gender", "v3", "bmi")])
head(os)
##   hipfx age gender    v3 bmi
## 1     0  73   Male 1.079  32
## 2     0  67 Female 0.966  26
## 3     0  68   Male 1.013  26
## 4     0  62 Female 0.839  24
## 5     0  61   Male 0.811  24
## 6     0  76 Female 0.743  28
# So sánh nguy cơ gãy xương giữa nam và nữ: mô hình hồi quy logistic
m = glm(hipfx ~ gender + age, family=binomial, data=os)
summary(m)
## 
## Call:
## glm(formula = hipfx ~ gender + age, family = binomial, data = os)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2012  -0.3574  -0.2718  -0.2270   2.9711  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -10.48742    0.78678 -13.330  < 2e-16 ***
## genderMale   -0.64377    0.18207  -3.536 0.000406 ***
## age           0.11216    0.01055  10.628  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1282.1  on 2715  degrees of freedom
## Residual deviance: 1153.8  on 2713  degrees of freedom
## AIC: 1159.8
## 
## Number of Fisher Scoring iterations: 6

Task 2: So sánh nguy cơ gãy xương giữa nam và nữ dùng PSA

# Gọi package MatchIt và Matching
library(MatchIt)
library(Matching)
## Loading required package: MASS
## ## 
## ##  Matching (Version 4.9-6, Build Date: 2019-04-07)
## ##  See http://sekhon.berkeley.edu/matching for additional documentation.
## ##  Please cite software as:
## ##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
## ##   Software with Automated Balance Optimization: The Matching package for R.''
## ##   Journal of Statistical Software, 42(7): 1-52. 
## ##
# "Tiên lượng" giới tính dùng logistic regression
m1 = glm((gender=="Female") ~ age + v3 + bmi, family=binomial, data=os)

# Ước tính propensity score
os$ps = predict(m1, type="response")
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
histbackback(split(os$ps, os$gender))

# Bắt cặp dựa vào propensity score
listMatch = Match(Tr=(os$gender=="Female"), X=log(os$ps/(1- os$ps)), M=1, caliper=0.05, replace =FALSE, ties=TRUE, version="fast")
## Warning in Match(Tr = (os$gender == "Female"), X = log(os$ps/(1 -
## os$ps)), : replace==FALSE, but there are more (weighted) treated obs than
## control obs. Some treated obs will not be matched. You may want to estimate
## ATC instead.
MatchBalance(formul=(os$gender=="Female") ~ age + v3 + bmi, data=os, match.out=listMatch)
## 
## ***** (V1) age *****
##                        Before Matching        After Matching
## mean treatment........     69.126             69.493 
## mean control..........      69.08             68.933 
## std mean diff.........     0.6754              8.048 
## 
## mean raw eQQ diff.....    0.85377            0.92814 
## med  raw eQQ diff.....          1                  1 
## max  raw eQQ diff.....          4                  4 
## 
## mean eCDF diff........   0.024386           0.028088 
## med  eCDF diff........   0.020801           0.026797 
## max  eCDF diff........   0.063789           0.059683 
## 
## var ratio (Tr/Co).....     1.3592             1.3912 
## T-test p-value........    0.85071           0.089209 
## KS Bootstrap p-value..      0.006              0.066 
## KS Naive p-value......  0.0097083            0.10737 
## KS Statistic..........   0.063789           0.059683 
## 
## 
## ***** (V2) v3 *****
##                        Before Matching        After Matching
## mean treatment........    0.81577            0.87838 
## mean control..........    0.93115            0.88546 
## std mean diff.........    -81.475            -5.0822 
## 
## mean raw eQQ diff.....    0.11565           0.012714 
## med  raw eQQ diff.....      0.115              0.009 
## max  raw eQQ diff.....      0.189               0.12 
## 
## mean eCDF diff........    0.16164           0.019468 
## med  eCDF diff........    0.15863           0.013398 
## max  eCDF diff........    0.33453           0.057247 
## 
## var ratio (Tr/Co).....    0.92458               1.24 
## T-test p-value........ < 2.22e-16           0.012005 
## KS Bootstrap p-value.. < 2.22e-16              0.158 
## KS Naive p-value...... < 2.22e-16            0.13564 
## KS Statistic..........    0.33453           0.057247 
## 
## 
## ***** (V3) bmi *****
##                        Before Matching        After Matching
## mean treatment........     26.427             26.348 
## mean control..........     26.952             26.828 
## std mean diff.........    -10.342            -9.8143 
## 
## mean raw eQQ diff.....    0.98365            0.88429 
## med  raw eQQ diff.....          1                  1 
## max  raw eQQ diff.....          7                  3 
## 
## mean eCDF diff........   0.026427           0.026649 
## med  eCDF diff........  0.0078836           0.010962 
## max  eCDF diff........    0.11246            0.11328 
## 
## var ratio (Tr/Co).....     1.5212             1.3758 
## T-test p-value........  0.0030506           0.027679 
## KS Bootstrap p-value.. < 2.22e-16         < 2.22e-16 
## KS Naive p-value...... 1.2841e-07         5.3194e-05 
## KS Statistic..........    0.11246            0.11328 
## 
## 
## Before Matching Minimum p.value: < 2.22e-16 
## Variable Name(s): v3 bmi  Number(s): 2 3 
## 
## After Matching Minimum p.value: < 2.22e-16 
## Variable Name(s): bmi  Number(s): 3
# Phân tích outcome
psMatch = os[unlist(listMatch[c("index.treated","index.control")]), ]
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:Hmisc':
## 
##     label, label<-, units
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~age + v3 + bmi | gender, data=psMatch)
Female
(n=821)
Male
(n=821)
Overall
(n=1642)
age
Mean (SD) 69.5 (6.96) 68.9 (5.90) 69.2 (6.46)
Median [Min, Max] 68.0 [60.0, 93.0] 68.0 [60.0, 89.0] 68.0 [60.0, 93.0]
v3
Mean (SD) 0.878 (0.139) 0.885 (0.125) 0.882 (0.133)
Median [Min, Max] 0.884 [0.449, 1.41] 0.882 [0.468, 1.29] 0.883 [0.449, 1.41]
bmi
Mean (SD) 26.3 (4.89) 26.8 (4.17) 26.6 (4.55)
Median [Min, Max] 26.0 [15.0, 49.0] 26.0 [17.0, 50.0] 26.0 [15.0, 50.0]
m2 = glm(hipfx ~ gender, family=binomial, data=psMatch)
summary(m2)
## 
## Call:
## glm(formula = hipfx ~ gender, family = binomial, data = psMatch)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.3653  -0.3653  -0.3201  -0.3201   2.4482  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.6735     0.1420 -18.825   <2e-16 ***
## genderMale   -0.2722     0.2141  -1.271    0.204    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 720.26  on 1641  degrees of freedom
## Residual deviance: 718.63  on 1640  degrees of freedom
## AIC: 722.63
## 
## Number of Fisher Scoring iterations: 5