Đọc dữ liệu “hip fracture data.csv” vào R và gọi đối tượng là os.
os = read.csv("C:\\Users\\Nguyen\\Desktop\\Workshop NCKH 2019\\Dataset\\Hip fracture data.csv")
os$hipfx = as.factor(os$hipfx)
os = na.omit(os[, c("hipfx", "age", "gender", "v3", "bmi")])
head(os)
## hipfx age gender v3 bmi
## 1 0 73 Male 1.079 32
## 2 0 67 Female 0.966 26
## 3 0 68 Male 1.013 26
## 4 0 62 Female 0.839 24
## 5 0 61 Male 0.811 24
## 6 0 76 Female 0.743 28
# So sánh nguy cơ gãy xương giữa nam và nữ: mô hình hồi quy logistic
m = glm(hipfx ~ gender + age, family=binomial, data=os)
summary(m)
##
## Call:
## glm(formula = hipfx ~ gender + age, family = binomial, data = os)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2012 -0.3574 -0.2718 -0.2270 2.9711
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.48742 0.78678 -13.330 < 2e-16 ***
## genderMale -0.64377 0.18207 -3.536 0.000406 ***
## age 0.11216 0.01055 10.628 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1282.1 on 2715 degrees of freedom
## Residual deviance: 1153.8 on 2713 degrees of freedom
## AIC: 1159.8
##
## Number of Fisher Scoring iterations: 6
# Gọi package MatchIt và Matching
library(MatchIt)
library(Matching)
## Loading required package: MASS
## ##
## ## Matching (Version 4.9-6, Build Date: 2019-04-07)
## ## See http://sekhon.berkeley.edu/matching for additional documentation.
## ## Please cite software as:
## ## Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
## ## Software with Automated Balance Optimization: The Matching package for R.''
## ## Journal of Statistical Software, 42(7): 1-52.
## ##
# "Tiên lượng" giới tính dùng logistic regression
m1 = glm((gender=="Female") ~ age + v3 + bmi, family=binomial, data=os)
# Ước tính propensity score
os$ps = predict(m1, type="response")
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
histbackback(split(os$ps, os$gender))
# Bắt cặp dựa vào propensity score
listMatch = Match(Tr=(os$gender=="Female"), X=log(os$ps/(1- os$ps)), M=1, caliper=0.05, replace =FALSE, ties=TRUE, version="fast")
## Warning in Match(Tr = (os$gender == "Female"), X = log(os$ps/(1 -
## os$ps)), : replace==FALSE, but there are more (weighted) treated obs than
## control obs. Some treated obs will not be matched. You may want to estimate
## ATC instead.
MatchBalance(formul=(os$gender=="Female") ~ age + v3 + bmi, data=os, match.out=listMatch)
##
## ***** (V1) age *****
## Before Matching After Matching
## mean treatment........ 69.126 69.493
## mean control.......... 69.08 68.933
## std mean diff......... 0.6754 8.048
##
## mean raw eQQ diff..... 0.85377 0.92814
## med raw eQQ diff..... 1 1
## max raw eQQ diff..... 4 4
##
## mean eCDF diff........ 0.024386 0.028088
## med eCDF diff........ 0.020801 0.026797
## max eCDF diff........ 0.063789 0.059683
##
## var ratio (Tr/Co)..... 1.3592 1.3912
## T-test p-value........ 0.85071 0.089209
## KS Bootstrap p-value.. 0.006 0.066
## KS Naive p-value...... 0.0097083 0.10737
## KS Statistic.......... 0.063789 0.059683
##
##
## ***** (V2) v3 *****
## Before Matching After Matching
## mean treatment........ 0.81577 0.87838
## mean control.......... 0.93115 0.88546
## std mean diff......... -81.475 -5.0822
##
## mean raw eQQ diff..... 0.11565 0.012714
## med raw eQQ diff..... 0.115 0.009
## max raw eQQ diff..... 0.189 0.12
##
## mean eCDF diff........ 0.16164 0.019468
## med eCDF diff........ 0.15863 0.013398
## max eCDF diff........ 0.33453 0.057247
##
## var ratio (Tr/Co)..... 0.92458 1.24
## T-test p-value........ < 2.22e-16 0.012005
## KS Bootstrap p-value.. < 2.22e-16 0.158
## KS Naive p-value...... < 2.22e-16 0.13564
## KS Statistic.......... 0.33453 0.057247
##
##
## ***** (V3) bmi *****
## Before Matching After Matching
## mean treatment........ 26.427 26.348
## mean control.......... 26.952 26.828
## std mean diff......... -10.342 -9.8143
##
## mean raw eQQ diff..... 0.98365 0.88429
## med raw eQQ diff..... 1 1
## max raw eQQ diff..... 7 3
##
## mean eCDF diff........ 0.026427 0.026649
## med eCDF diff........ 0.0078836 0.010962
## max eCDF diff........ 0.11246 0.11328
##
## var ratio (Tr/Co)..... 1.5212 1.3758
## T-test p-value........ 0.0030506 0.027679
## KS Bootstrap p-value.. < 2.22e-16 < 2.22e-16
## KS Naive p-value...... 1.2841e-07 5.3194e-05
## KS Statistic.......... 0.11246 0.11328
##
##
## Before Matching Minimum p.value: < 2.22e-16
## Variable Name(s): v3 bmi Number(s): 2 3
##
## After Matching Minimum p.value: < 2.22e-16
## Variable Name(s): bmi Number(s): 3
# Phân tích outcome
psMatch = os[unlist(listMatch[c("index.treated","index.control")]), ]
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:Hmisc':
##
## label, label<-, units
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~age + v3 + bmi | gender, data=psMatch)
| Female (n=821) |
Male (n=821) |
Overall (n=1642) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 69.5 (6.96) | 68.9 (5.90) | 69.2 (6.46) |
| Median [Min, Max] | 68.0 [60.0, 93.0] | 68.0 [60.0, 89.0] | 68.0 [60.0, 93.0] |
| v3 | |||
| Mean (SD) | 0.878 (0.139) | 0.885 (0.125) | 0.882 (0.133) |
| Median [Min, Max] | 0.884 [0.449, 1.41] | 0.882 [0.468, 1.29] | 0.883 [0.449, 1.41] |
| bmi | |||
| Mean (SD) | 26.3 (4.89) | 26.8 (4.17) | 26.6 (4.55) |
| Median [Min, Max] | 26.0 [15.0, 49.0] | 26.0 [17.0, 50.0] | 26.0 [15.0, 50.0] |
m2 = glm(hipfx ~ gender, family=binomial, data=psMatch)
summary(m2)
##
## Call:
## glm(formula = hipfx ~ gender, family = binomial, data = psMatch)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3653 -0.3653 -0.3201 -0.3201 2.4482
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.6735 0.1420 -18.825 <2e-16 ***
## genderMale -0.2722 0.2141 -1.271 0.204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 720.26 on 1641 degrees of freedom
## Residual deviance: 718.63 on 1640 degrees of freedom
## AIC: 722.63
##
## Number of Fisher Scoring iterations: 5