library(compareGroups)
library(MatchIt)
## Warning: package 'MatchIt' was built under R version 4.3.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
ps = read.csv("C:\\Thach\\VN trips\\2026_1Jan\\Data Analysis Workshop_PN Institute\\Data\\ps_data.csv")
head(ps)
## id age sex weight height smoking activity heart hypert diabetes cancer lung
## 1 1 57.0 1 99.0 165.0 0 0 0 1 0 1 1
## 2 2 57.0 1 80.9 160.0 1 1 0 1 0 1 1
## 3 3 76.5 0 75.7 175.9 0 0 0 1 0 1 0
## 4 4 72.0 1 59.0 151.0 0 1 0 0 0 0 0
## 5 5 85.5 0 55.5 157.0 0 0 0 0 0 1 0
## 6 6 48.0 1 94.0 158.5 0 0 0 1 0 0 0
## treat death
## 1 1 0
## 2 1 0
## 3 0 0
## 4 1 0
## 5 0 1
## 6 1 0
ps = ps %>% mutate(sex = as.character(sex),
smoking = as.character(smoking),
activity = as.character(activity),
heart = as.character(heart),
hypert = as.character(hypert),
diabetes = as.character(diabetes),
cancer = as.character(cancer),
lung = as.character(lung),
death.c = as.character(death))
des.1 = compareGroups(treat ~ age + sex + weight + height + smoking + activity + heart + hypert + diabetes + cancer + lung, data = ps)
createTable(des.1)
##
## --------Summary descriptives table by 'treat'---------
##
## ___________________________________________
## 0 1 p.overall
## N=706 N=294
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 67.7 (9.33) 65.8 (9.05) 0.002
## sex: 0.002
## 0 192 (27.2%) 52 (17.7%)
## 1 514 (72.8%) 242 (82.3%)
## weight 70.3 (15.2) 68.8 (14.6) 0.143
## height 162 (9.00) 157 (8.29) <0.001
## smoking: <0.001
## 0 620 (87.8%) 232 (78.9%)
## 1 86 (12.2%) 62 (21.1%)
## activity: 0.152
## 0 332 (47.0%) 123 (41.8%)
## 1 374 (53.0%) 171 (58.2%)
## heart: 0.301
## 0 621 (88.0%) 266 (90.5%)
## 1 85 (12.0%) 28 (9.52%)
## hypert: 0.059
## 0 358 (50.7%) 169 (57.5%)
## 1 348 (49.3%) 125 (42.5%)
## diabetes: 0.004
## 0 602 (85.3%) 271 (92.2%)
## 1 104 (14.7%) 23 (7.82%)
## cancer: 0.288
## 0 586 (83.0%) 235 (79.9%)
## 1 120 (17.0%) 59 (20.1%)
## lung: 0.852
## 0 610 (86.4%) 252 (85.7%)
## 1 96 (13.6%) 42 (14.3%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
m_ps= glm(treat~ age+ sex+ weight+ height+ smoking+ activity+ heart+ hypert+ diabetes+ cancer+ lung, family= binomial(), data= ps)
summary(m_ps)
##
## Call:
## glm(formula = treat ~ age + sex + weight + height + smoking +
## activity + heart + hypert + diabetes + cancer + lung, family = binomial(),
## data = ps)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 19.791727 2.311957 8.561 < 2e-16 ***
## age -0.027460 0.008867 -3.097 0.001955 **
## sex1 -0.960857 0.256015 -3.753 0.000175 ***
## weight 0.021321 0.006436 3.313 0.000923 ***
## height -0.123487 0.013423 -9.200 < 2e-16 ***
## smoking1 0.624629 0.208107 3.001 0.002687 **
## activity1 0.234277 0.153563 1.526 0.127106
## heart1 -0.172813 0.255176 -0.677 0.498259
## hypert1 -0.292339 0.155874 -1.875 0.060726 .
## diabetes1 -0.745411 0.268999 -2.771 0.005587 **
## cancer1 0.447509 0.192421 2.326 0.020036 *
## lung1 0.058243 0.220879 0.264 0.792020
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1211.4 on 999 degrees of freedom
## Residual deviance: 1074.4 on 988 degrees of freedom
## AIC: 1098.4
##
## Number of Fisher Scoring iterations: 4
ps$p.score = predict(m_ps, type = "response")
head(ps)
## id age sex weight height smoking activity heart hypert diabetes cancer lung
## 1 1 57.0 1 99.0 165.0 0 0 0 1 0 1 1
## 2 2 57.0 1 80.9 160.0 1 1 0 1 0 1 1
## 3 3 76.5 0 75.7 175.9 0 0 0 1 0 1 0
## 4 4 72.0 1 59.0 151.0 0 1 0 0 0 0 0
## 5 5 85.5 0 55.5 157.0 0 0 0 0 0 1 0
## 6 6 48.0 1 94.0 158.5 0 0 0 1 0 0 0
## treat death death.c p.score
## 1 1 0 0 0.31315211
## 2 1 0 0 0.57566536
## 3 0 0 0 0.09439966
## 4 1 0 0 0.42543818
## 5 0 1 1 0.42246799
## 6 1 0 0 0.41387229
des.2 = compareGroups(treat ~ p.score + age + sex + weight + height + smoking + activity + heart + hypert + diabetes + cancer + lung, data = ps)
createTable(des.2)
##
## --------Summary descriptives table by 'treat'---------
##
## ___________________________________________
## 0 1 p.overall
## N=706 N=294
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## p.score 0.25 (0.15) 0.39 (0.17) <0.001
## age 67.7 (9.33) 65.8 (9.05) 0.002
## sex: 0.002
## 0 192 (27.2%) 52 (17.7%)
## 1 514 (72.8%) 242 (82.3%)
## weight 70.3 (15.2) 68.8 (14.6) 0.143
## height 162 (9.00) 157 (8.29) <0.001
## smoking: <0.001
## 0 620 (87.8%) 232 (78.9%)
## 1 86 (12.2%) 62 (21.1%)
## activity: 0.152
## 0 332 (47.0%) 123 (41.8%)
## 1 374 (53.0%) 171 (58.2%)
## heart: 0.301
## 0 621 (88.0%) 266 (90.5%)
## 1 85 (12.0%) 28 (9.52%)
## hypert: 0.059
## 0 358 (50.7%) 169 (57.5%)
## 1 348 (49.3%) 125 (42.5%)
## diabetes: 0.004
## 0 602 (85.3%) 271 (92.2%)
## 1 104 (14.7%) 23 (7.82%)
## cancer: 0.288
## 0 586 (83.0%) 235 (79.9%)
## 1 120 (17.0%) 59 (20.1%)
## lung: 0.852
## 0 610 (86.4%) 252 (85.7%)
## 1 96 (13.6%) 42 (14.3%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
ggplot(aes(x= p.score), data = ps) + geom_histogram(color= "white")+ facet_wrap(~ treat) + xlab("Xác suất nhận can thiệp")+ theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
ps$treat.c[ps$treat == 1] = "Treated"
ps$treat.c[ps$treat == 0] = "Control"
ggplot(data = ps, aes(x = p.score, fill = treat.c)) + geom_density(alpha = 0.7) + labs(x = "Propensity score", y = "", title = "Phân bố Propensity score theo nhóm can thiệp") + theme_minimal()
match.out = matchit(treat~ age + sex + weight + height + smoking + activity + heart + hypert + diabetes + cancer + lung, data = ps, method = "nearest", ratio = 1)
summary(match.out, standardize = TRUE)
##
## Call:
## matchit(formula = treat ~ age + sex + weight + height + smoking +
## activity + heart + hypert + diabetes + cancer + lung, data = ps,
## method = "nearest", ratio = 1)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.3885 0.2546 0.7873 1.3359 0.2242
## age 65.7755 67.7252 -0.2154 0.9413 0.0461
## sex0 0.1769 0.2720 -0.2492 . 0.0951
## sex1 0.8231 0.7280 0.2492 . 0.0951
## weight 68.7646 70.2694 -0.1031 0.9272 0.0266
## height 156.7565 161.8436 -0.6133 0.8486 0.1391
## smoking0 0.7891 0.8782 -0.2183 . 0.0891
## smoking1 0.2109 0.1218 0.2183 . 0.0891
## activity0 0.4184 0.4703 -0.1052 . 0.0519
## activity1 0.5816 0.5297 0.1052 . 0.0519
## heart0 0.9048 0.8796 0.0857 . 0.0252
## heart1 0.0952 0.1204 -0.0857 . 0.0252
## hypert0 0.5748 0.5071 0.1370 . 0.0677
## hypert1 0.4252 0.4929 -0.1370 . 0.0677
## diabetes0 0.9218 0.8527 0.2572 . 0.0691
## diabetes1 0.0782 0.1473 -0.2572 . 0.0691
## cancer0 0.7993 0.8300 -0.0767 . 0.0307
## cancer1 0.2007 0.1700 0.0767 . 0.0307
## lung0 0.8571 0.8640 -0.0197 . 0.0069
## lung1 0.1429 0.1360 0.0197 . 0.0069
## eCDF Max
## distance 0.3245
## age 0.1214
## sex0 0.0951
## sex1 0.0951
## weight 0.0621
## height 0.2586
## smoking0 0.0891
## smoking1 0.0891
## activity0 0.0519
## activity1 0.0519
## heart0 0.0252
## heart1 0.0252
## hypert0 0.0677
## hypert1 0.0677
## diabetes0 0.0691
## diabetes1 0.0691
## cancer0 0.0307
## cancer1 0.0307
## lung0 0.0069
## lung1 0.0069
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.3885 0.3637 0.1461 1.4474 0.0221
## age 65.7755 65.9762 -0.0222 0.8747 0.0263
## sex0 0.1769 0.1803 -0.0089 . 0.0034
## sex1 0.8231 0.8197 0.0089 . 0.0034
## weight 68.7646 68.2112 0.0379 0.8734 0.0196
## height 156.7565 157.3483 -0.0714 1.0946 0.0172
## smoking0 0.7891 0.8231 -0.0834 . 0.0340
## smoking1 0.2109 0.1769 0.0834 . 0.0340
## activity0 0.4184 0.4116 0.0138 . 0.0068
## activity1 0.5816 0.5884 -0.0138 . 0.0068
## heart0 0.9048 0.8912 0.0463 . 0.0136
## heart1 0.0952 0.1088 -0.0463 . 0.0136
## hypert0 0.5748 0.5714 0.0069 . 0.0034
## hypert1 0.4252 0.4286 -0.0069 . 0.0034
## diabetes0 0.9218 0.9286 -0.0253 . 0.0068
## diabetes1 0.0782 0.0714 0.0253 . 0.0068
## cancer0 0.7993 0.7993 0.0000 . 0.0000
## cancer1 0.2007 0.2007 0.0000 . 0.0000
## lung0 0.8571 0.8639 -0.0194 . 0.0068
## lung1 0.1429 0.1361 0.0194 . 0.0068
## eCDF Max Std. Pair Dist.
## distance 0.1293 0.1471
## age 0.0782 1.1344
## sex0 0.0034 0.7042
## sex1 0.0034 0.7042
## weight 0.0884 1.1813
## height 0.0612 0.8181
## smoking0 0.0340 0.7004
## smoking1 0.0340 0.7004
## activity0 0.0068 0.9929
## activity1 0.0068 0.9929
## heart0 0.0136 0.6025
## heart1 0.0136 0.6025
## hypert0 0.0034 1.0252
## hypert1 0.0034 1.0252
## diabetes0 0.0068 0.5067
## diabetes1 0.0068 0.5067
## cancer0 0.0000 0.2653
## cancer1 0.0000 0.2653
## lung0 0.0068 0.6610
## lung1 0.0068 0.6610
##
## Sample Sizes:
## Control Treated
## All 706 294
## Matched 294 294
## Unmatched 412 0
## Discarded 0 0
plot(match.out, type = "jitter")
## To identify the units, use first mouse button; to stop, use second.
plot(match.out, type = "hist")
ps.matched = match.data(match.out)
dim(ps.matched)
## [1] 588 20
des.3 = compareGroups(treat ~ age + sex + weight + height + smoking + activity + heart + hypert + diabetes + cancer + lung, data = ps.matched)
createTable(des.3)
##
## --------Summary descriptives table by 'treat'---------
##
## ___________________________________________
## 0 1 p.overall
## N=294 N=294
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 66.0 (9.68) 65.8 (9.05) 0.795
## sex: 1.000
## 0 53 (18.0%) 52 (17.7%)
## 1 241 (82.0%) 242 (82.3%)
## weight 68.2 (15.6) 68.8 (14.6) 0.657
## height 157 (7.93) 157 (8.29) 0.377
## smoking: 0.348
## 0 242 (82.3%) 232 (78.9%)
## 1 52 (17.7%) 62 (21.1%)
## activity: 0.933
## 0 121 (41.2%) 123 (41.8%)
## 1 173 (58.8%) 171 (58.2%)
## heart: 0.683
## 0 262 (89.1%) 266 (90.5%)
## 1 32 (10.9%) 28 (9.52%)
## hypert: 1.000
## 0 168 (57.1%) 169 (57.5%)
## 1 126 (42.9%) 125 (42.5%)
## diabetes: 0.875
## 0 273 (92.9%) 271 (92.2%)
## 1 21 (7.14%) 23 (7.82%)
## cancer: 1.000
## 0 235 (79.9%) 235 (79.9%)
## 1 59 (20.1%) 59 (20.1%)
## lung: 0.905
## 0 254 (86.4%) 252 (85.7%)
## 1 40 (13.6%) 42 (14.3%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
library(cobalt)
## cobalt (Version 4.6.1, Build Date: 2025-08-20)
##
## Attaching package: 'cobalt'
## The following object is masked from 'package:MatchIt':
##
## lalonde
love.plot(match.out,
stats = "mean.diffs",
threshold = 0.1,
abs = TRUE,
stars = "raw",
var.order = "unadjusted",
sample.names = c("Chưa bắt cặp", "Bắt cặp"))
createTable(compareGroups(treat ~ death.c, data = ps.matched))
##
## --------Summary descriptives table by 'treat'---------
##
## __________________________________________
## 0 1 p.overall
## N=294 N=294
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## death.c: 1.000
## 0 247 (84.0%) 246 (83.7%)
## 1 47 (16.0%) 48 (16.3%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
psa.matched = glm(death ~ treat.c, family = binomial(), data = ps.matched)
summary(psa.matched)
##
## Call:
## glm(formula = death ~ treat.c, family = binomial(), data = ps.matched)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.65924 0.15914 -10.426 <2e-16 ***
## treat.cTreated 0.02511 0.22411 0.112 0.911
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 520.09 on 587 degrees of freedom
## Residual deviance: 520.08 on 586 degrees of freedom
## AIC: 524.08
##
## Number of Fisher Scoring iterations: 3
# Unmatched analysis:
createTable(compareGroups(treat ~ death.c, data = ps))
##
## --------Summary descriptives table by 'treat'---------
##
## __________________________________________
## 0 1 p.overall
## N=706 N=294
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## death.c: 0.161
## 0 562 (79.6%) 246 (83.7%)
## 1 144 (20.4%) 48 (16.3%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
unadjusted = glm(death ~ treat.c, family = binomial(), data = ps)
summary(unadjusted)
##
## Call:
## glm(formula = death ~ treat.c, family = binomial(), data = ps)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3617 0.0934 -14.579 <2e-16 ***
## treat.cTreated -0.2724 0.1834 -1.486 0.137
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 978.22 on 999 degrees of freedom
## Residual deviance: 975.95 on 998 degrees of freedom
## AIC: 979.95
##
## Number of Fisher Scoring iterations: 4
library(lessR)
## Warning: package 'lessR' was built under R version 4.3.3
##
## lessR 4.3.9 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read text, Excel, SPSS, SAS, or R data file
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following objects are masked from 'package:dplyr':
##
## recode, rename
Logit(death ~ treat, data = ps.matched)
##
## Response Variable: death
## Predictor Variable 1: treat
##
## Number of cases (rows) of data: 588
## Number of cases retained for analysis: 588
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of death for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -1.6592 0.1591 -10.426 0.000 -1.9711 -1.3473
## treat 0.0251 0.2241 0.112 0.911 -0.4141 0.4643
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.1903 0.1393 0.2599
## treat 1.0254 0.6609 1.5910
##
##
## -- Model Fit
##
## Null deviance: 520.092 on 587 degrees of freedom
## Residual deviance: 520.080 on 586 degrees of freedom
##
## AIC: 524.0797
##
## Number of iterations to convergence: 3
##
##
## ANALYSIS OF RESIDUALS AND INFLUENCE
## Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [res_rows = 20 out of 588 cases (rows) of data]
## --------------------------------------------------------------------
## treat death P(Y=1) residual rstudent dffits cooks
## 5 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 27 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 28 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 29 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 71 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 74 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 89 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 92 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 100 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 121 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 128 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 134 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 136 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 149 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 155 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 216 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 234 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 245 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 249 0 1 0.1599 0.8401 1.92 0.1193 0.008999
## 255 0 1 0.1599 0.8401 1.92 0.1193 0.008999
##
##
## PREDICTION
##
## Probability threshold for classification : 0.5
##
##
## Data, Fitted Values, Standard Errors
## [sorted by fitted value]
## [pred_all=TRUE to see all intervals displayed]
## --------------------------------------------------------------------
## treat death label fitted std.err
## 5 0 1 0 0.1599 0.02137
## 7 0 1 0 0.1599 0.02137
## 12 0 0 0 0.1599 0.02137
## 13 0 0 0 0.1599 0.02137
##
## ... for the rows of data where fitted is close to 0.5 ...
##
## treat death label fitted std.err
## 985 0 0 0 0.1599 0.02137
## 989 0 0 0 0.1599 0.02137
## 1 1 0 0 0.1633 0.02156
## 2 1 0 0 0.1633 0.02156
## 4 1 0 0 0.1633 0.02156
##
## ... for the last 4 rows of sorted data ...
##
## treat death label fitted std.err
## 992 1 0 0 0.1633 0.02156
## 993 1 0 0 0.1633 0.02156
## 995 1 1 0 0.1633 0.02156
## 999 1 1 0 0.1633 0.02156
## --------------------------------------------------------------------
##
##
## ----------------------------
## Specified confusion matrices
## ----------------------------
##
## Probability threshold for predicting : 0.5
## Corresponding cutoff threshold for treat: 66.078
##
## Baseline Predicted
## ---------------------------------------------------
## Total %Tot 0 1 %Correct
## ---------------------------------------------------
## 1 95 16.2 95 0 0.0
## death 0 493 83.8 493 0 100.0
## ---------------------------------------------------
## Total 588 83.8
##
## Accuracy: 83.84
## Sensitivity: 0.00
## Precision: NaN
Logit(death ~ treat, data = ps)
##
## Response Variable: death
## Predictor Variable 1: treat
##
## Number of cases (rows) of data: 1000
## Number of cases retained for analysis: 1000
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of death for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -1.3617 0.0934 -14.579 0.000 -1.5448 -1.1786
## treat -0.2724 0.1834 -1.486 0.137 -0.6318 0.0869
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.2562 0.2134 0.3077
## treat 0.7615 0.5316 1.0908
##
##
## -- Model Fit
##
## Null deviance: 978.220 on 999 degrees of freedom
## Residual deviance: 975.949 on 998 degrees of freedom
##
## AIC: 979.9489
##
## Number of iterations to convergence: 4
##
##
## ANALYSIS OF RESIDUALS AND INFLUENCE
## Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [res_rows = 20 out of 1000 cases (rows) of data]
## --------------------------------------------------------------------
## treat death P(Y=1) residual rstudent dffits cooks
## 17 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 53 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 62 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 111 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 116 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 147 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 183 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 189 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 197 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 201 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 225 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 252 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 263 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 266 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 288 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 308 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 316 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 343 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 346 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 384 1 1 0.1633 0.8367 1.908 0.1128 0.008776
##
##
## PREDICTION
##
## Probability threshold for classification : 0.5
##
##
## Data, Fitted Values, Standard Errors
## [sorted by fitted value]
## [pred_all=TRUE to see all intervals displayed]
## --------------------------------------------------------------------
## treat death label fitted std.err
## 1 1 0 0 0.1633 0.02156
## 2 1 0 0 0.1633 0.02156
## 4 1 0 0 0.1633 0.02156
## 6 1 0 0 0.1633 0.02156
##
## ... for the rows of data where fitted is close to 0.5 ...
##
## treat death label fitted std.err
## 995 1 1 0 0.1633 0.02156
## 999 1 1 0 0.1633 0.02156
## 3 0 0 0 0.2040 0.01516
## 5 0 1 0 0.2040 0.01516
## 7 0 1 0 0.2040 0.01516
##
## ... for the last 4 rows of sorted data ...
##
## treat death label fitted std.err
## 996 0 0 0 0.204 0.01516
## 997 0 0 0 0.204 0.01516
## 998 0 0 0 0.204 0.01516
## 1000 0 1 0 0.204 0.01516
## --------------------------------------------------------------------
##
##
## ----------------------------
## Specified confusion matrices
## ----------------------------
##
## Probability threshold for predicting : 0.5
## Corresponding cutoff threshold for treat: -4.998
##
## Baseline Predicted
## ---------------------------------------------------
## Total %Tot 0 1 %Correct
## ---------------------------------------------------
## 1 192 19.2 192 0 0.0
## death 0 808 80.8 808 0 100.0
## ---------------------------------------------------
## Total 1000 80.8
##
## Accuracy: 80.80
## Sensitivity: 0.00
## Precision: NaN
des.2 = compareGroups(treat ~ p.score + age + sex + weight + height + smoking + activity + heart + hypert + diabetes + cancer + lung, data = ps)
createTable(des.2)
##
## --------Summary descriptives table by 'treat'---------
##
## ___________________________________________
## 0 1 p.overall
## N=706 N=294
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## p.score 0.25 (0.15) 0.39 (0.17) <0.001
## age 67.7 (9.33) 65.8 (9.05) 0.002
## sex: 0.002
## 0 192 (27.2%) 52 (17.7%)
## 1 514 (72.8%) 242 (82.3%)
## weight 70.3 (15.2) 68.8 (14.6) 0.143
## height 162 (9.00) 157 (8.29) <0.001
## smoking: <0.001
## 0 620 (87.8%) 232 (78.9%)
## 1 86 (12.2%) 62 (21.1%)
## activity: 0.152
## 0 332 (47.0%) 123 (41.8%)
## 1 374 (53.0%) 171 (58.2%)
## heart: 0.301
## 0 621 (88.0%) 266 (90.5%)
## 1 85 (12.0%) 28 (9.52%)
## hypert: 0.059
## 0 358 (50.7%) 169 (57.5%)
## 1 348 (49.3%) 125 (42.5%)
## diabetes: 0.004
## 0 602 (85.3%) 271 (92.2%)
## 1 104 (14.7%) 23 (7.82%)
## cancer: 0.288
## 0 586 (83.0%) 235 (79.9%)
## 1 120 (17.0%) 59 (20.1%)
## lung: 0.852
## 0 610 (86.4%) 252 (85.7%)
## 1 96 (13.6%) 42 (14.3%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
psa.adj = glm(death ~ treat + p.score, family = binomial(), data = ps)
summary(psa.adj)
##
## Call:
## glm(formula = death ~ treat + p.score, family = binomial(), data = ps)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.92027 0.15967 -5.764 0.00000000823 ***
## treat -0.04003 0.19664 -0.204 0.83869
## p.score -1.81538 0.55690 -3.260 0.00111 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 978.22 on 999 degrees of freedom
## Residual deviance: 964.82 on 997 degrees of freedom
## AIC: 970.82
##
## Number of Fisher Scoring iterations: 4
unadjusted = glm(death ~ treat, family = binomial(), data = ps)
summary(unadjusted)
##
## Call:
## glm(formula = death ~ treat, family = binomial(), data = ps)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3617 0.0934 -14.579 <0.0000000000000002 ***
## treat -0.2724 0.1834 -1.486 0.137
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 978.22 on 999 degrees of freedom
## Residual deviance: 975.95 on 998 degrees of freedom
## AIC: 979.95
##
## Number of Fisher Scoring iterations: 4
Logit(death ~ treat + p.score, data = ps)
##
## Response Variable: death
## Predictor Variable 1: treat
## Predictor Variable 2: p.score
##
## Number of cases (rows) of data: 1000
## Number of cases retained for analysis: 1000
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of death for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -0.9203 0.1597 -5.764 0.000 -1.2332 -0.6073
## treat -0.0400 0.1966 -0.204 0.839 -0.4254 0.3454
## p.score -1.8154 0.5569 -3.260 0.001 -2.9069 -0.7239
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.3984 0.2914 0.5448
## treat 0.9608 0.6535 1.4125
## p.score 0.1628 0.0546 0.4849
##
##
## -- Model Fit
##
## Null deviance: 978.220 on 999 degrees of freedom
## Residual deviance: 964.820 on 997 degrees of freedom
##
## AIC: 970.8195
##
## Number of iterations to convergence: 4
##
##
## Collinearity
##
## Tolerance VIF
## treat 0.864 1.157
## p.score 0.864 1.157
##
## ANALYSIS OF RESIDUALS AND INFLUENCE
## Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [res_rows = 20 out of 1000 cases (rows) of data]
## --------------------------------------------------------------------
## treat p.score death P(Y=1) residual rstudent dffits cooks
## 883 1 0.83190 1 0.07795 0.9220 2.277 0.1925 0.027573
## 189 1 0.75148 1 0.08911 0.9109 2.213 0.1748 0.020753
## 27 0 0.64782 1 0.10946 0.8905 2.115 0.1682 0.016716
## 540 1 0.69201 1 0.09827 0.9017 2.166 0.1616 0.016598
## 134 0 0.64417 1 0.11010 0.8899 2.112 0.1671 0.016439
## 111 1 0.68003 1 0.10022 0.8998 2.156 0.1590 0.015850
## 886 1 0.63222 1 0.10832 0.8917 2.118 0.1487 0.013155
## 960 1 0.62008 1 0.11047 0.8895 2.108 0.1461 0.012544
## 385 1 0.60668 1 0.11288 0.8871 2.097 0.1434 0.011902
## 249 0 0.57625 1 0.12278 0.8772 2.057 0.1469 0.011810
## 587 0 0.56179 1 0.12563 0.8744 2.045 0.1425 0.010949
## 343 1 0.57700 1 0.11839 0.8816 2.073 0.1375 0.010606
## 62 1 0.03723 1 0.26349 0.7365 1.643 0.1757 0.010421
## 858 1 0.04755 1 0.25988 0.7401 1.651 0.1726 0.010141
## 922 0 0.53152 1 0.13180 0.8682 2.020 0.1333 0.009288
## 988 1 0.09513 1 0.24361 0.7564 1.688 0.1588 0.008930
## 521 1 0.09665 1 0.24310 0.7569 1.690 0.1584 0.008894
## 252 1 0.09988 1 0.24202 0.7580 1.692 0.1575 0.008817
## 995 1 0.51617 1 0.13041 0.8696 2.025 0.1268 0.008466
## 444 1 0.51368 1 0.13092 0.8691 2.023 0.1265 0.008393
##
##
## PREDICTION
##
## Probability threshold for classification : 0.5
##
##
## Data, Fitted Values, Standard Errors
## [sorted by fitted value]
## [pred_all=TRUE to see all intervals displayed]
## --------------------------------------------------------------------
## treat p.score death label fitted std.err
## 86 1 0.8509 0 0 0.07551 0.02226
## 883 1 0.8319 1 0 0.07795 0.02226
## 564 1 0.8153 0 0 0.08015 0.02225
## 282 1 0.8118 0 0 0.08062 0.02225
##
## ... for the rows of data where fitted is close to 0.5 ...
##
## treat p.score death label fitted std.err
## 845 0 0.027726 1 0 0.2748 0.02938
## 716 0 0.025445 0 0 0.2756 0.02963
## 345 0 0.008018 1 0 0.2819 0.03160
## NA NA NA NA NA NA NA
## NA.1 NA NA NA NA NA NA
##
## ... for the last 4 rows of sorted data ...
##
## treat p.score death label fitted std.err
## 434 0 0.031032 0 0 0.2736 0.02902
## 845 0 0.027726 1 0 0.2748 0.02938
## 716 0 0.025445 0 0 0.2756 0.02963
## 345 0 0.008018 1 0 0.2819 0.03160
## --------------------------------------------------------------------
##
##
## ----------------------------
## Specified confusion matrices
## ----------------------------
##
## Probability threshold for predicting : 0.5
##
## Baseline Predicted
## ---------------------------------------------------
## Total %Tot 0 1 %Correct
## ---------------------------------------------------
## 1 192 19.2 192 0 0.0
## death 0 808 80.8 808 0 100.0
## ---------------------------------------------------
## Total 1000 80.8
##
## Accuracy: 80.80
## Sensitivity: 0.00
## Precision: NaN
Logit(death ~ treat, data = ps)
##
## Response Variable: death
## Predictor Variable 1: treat
##
## Number of cases (rows) of data: 1000
## Number of cases retained for analysis: 1000
##
##
## BASIC ANALYSIS
##
## -- Estimated Model of death for the Logit of Reference Group Membership
##
## Estimate Std Err z-value p-value Lower 95% Upper 95%
## (Intercept) -1.3617 0.0934 -14.579 0.000 -1.5448 -1.1786
## treat -0.2724 0.1834 -1.486 0.137 -0.6318 0.0869
##
##
## -- Odds Ratios and Confidence Intervals
##
## Odds Ratio Lower 95% Upper 95%
## (Intercept) 0.2562 0.2134 0.3077
## treat 0.7615 0.5316 1.0908
##
##
## -- Model Fit
##
## Null deviance: 978.220 on 999 degrees of freedom
## Residual deviance: 975.949 on 998 degrees of freedom
##
## AIC: 979.9489
##
## Number of iterations to convergence: 4
##
##
## ANALYSIS OF RESIDUALS AND INFLUENCE
## Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [res_rows = 20 out of 1000 cases (rows) of data]
## --------------------------------------------------------------------
## treat death P(Y=1) residual rstudent dffits cooks
## 17 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 53 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 62 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 111 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 116 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 147 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 183 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 189 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 197 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 201 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 225 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 252 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 263 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 266 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 288 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 308 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 316 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 343 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 346 1 1 0.1633 0.8367 1.908 0.1128 0.008776
## 384 1 1 0.1633 0.8367 1.908 0.1128 0.008776
##
##
## PREDICTION
##
## Probability threshold for classification : 0.5
##
##
## Data, Fitted Values, Standard Errors
## [sorted by fitted value]
## [pred_all=TRUE to see all intervals displayed]
## --------------------------------------------------------------------
## treat death label fitted std.err
## 1 1 0 0 0.1633 0.02156
## 2 1 0 0 0.1633 0.02156
## 4 1 0 0 0.1633 0.02156
## 6 1 0 0 0.1633 0.02156
##
## ... for the rows of data where fitted is close to 0.5 ...
##
## treat death label fitted std.err
## 995 1 1 0 0.1633 0.02156
## 999 1 1 0 0.1633 0.02156
## 3 0 0 0 0.2040 0.01516
## 5 0 1 0 0.2040 0.01516
## 7 0 1 0 0.2040 0.01516
##
## ... for the last 4 rows of sorted data ...
##
## treat death label fitted std.err
## 996 0 0 0 0.204 0.01516
## 997 0 0 0 0.204 0.01516
## 998 0 0 0 0.204 0.01516
## 1000 0 1 0 0.204 0.01516
## --------------------------------------------------------------------
##
##
## ----------------------------
## Specified confusion matrices
## ----------------------------
##
## Probability threshold for predicting : 0.5
## Corresponding cutoff threshold for treat: -4.998
##
## Baseline Predicted
## ---------------------------------------------------
## Total %Tot 0 1 %Correct
## ---------------------------------------------------
## 1 192 19.2 192 0 0.0
## death 0 808 80.8 808 0 100.0
## ---------------------------------------------------
## Total 1000 80.8
##
## Accuracy: 80.80
## Sensitivity: 0.00
## Precision: NaN
PROMPT (10/12/2025): Tôi dự kiến thực hiện phân tích propensity score (Propensity Score Analysis) để so sánh tỉ lệ tử vong (biến “death”) giữa 2 nhóm can thiệp (“treat”). Bạn cung cấp R codes để (i) xây dựng mô hình logistic tiên lượng xác suất thuộc nhóm can thiệp (“treat”) với các biến số gồm age, sex, weight, height, smoking, activity, heart, hypert, diabetes, cancer, lung; (ii) ước tính propensity score cho mỗi cá nhân trong nghiên cứu; (iii) bắt cặp bằng propensity score và so sánh tỉ lệ tử vong (“death”) giữa 2 nhóm bằng hồi qui logistic; và (iv) so sánh tỉ lệ tử vong bằng hồi qui logistic có hiệu chỉnh cho propensity score trong toàn mẫu nghiên cứu
✅ (i) Xây dựng mô hình logistic dự đoán xác suất thuộc nhóm can thiệp (“treat”)
data = read.csv("C:\\Thach\\VN trips\\2026_1Jan\\Data Analysis Workshop_PN Institute\\Data\\ps_data.csv")
# Mô hình logistic để tiên lượng xác suất thuộc nhóm can thiệp
ps_model <- glm(treat ~ age + sex + weight + height + smoking + activity + heart + hypert + diabetes + cancer + lung, data = data, family = binomial)
summary(ps_model)
##
## Call:
## glm(formula = treat ~ age + sex + weight + height + smoking +
## activity + heart + hypert + diabetes + cancer + lung, family = binomial,
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 19.791727 2.311957 8.561 < 0.0000000000000002 ***
## age -0.027460 0.008867 -3.097 0.001955 **
## sex -0.960857 0.256015 -3.753 0.000175 ***
## weight 0.021321 0.006436 3.313 0.000923 ***
## height -0.123487 0.013423 -9.200 < 0.0000000000000002 ***
## smoking 0.624629 0.208107 3.001 0.002687 **
## activity 0.234277 0.153563 1.526 0.127106
## heart -0.172813 0.255176 -0.677 0.498259
## hypert -0.292339 0.155874 -1.875 0.060726 .
## diabetes -0.745411 0.268999 -2.771 0.005587 **
## cancer 0.447509 0.192421 2.326 0.020036 *
## lung 0.058243 0.220879 0.264 0.792020
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1211.4 on 999 degrees of freedom
## Residual deviance: 1074.4 on 988 degrees of freedom
## AIC: 1098.4
##
## Number of Fisher Scoring iterations: 4
✅ (ii) Ước tính propensity score cho mỗi cá nhân
# Ước tính propensity score
data$pscore <- predict(ps_model, type = "response")
# Kiểm tra tóm tắt
summary(data$pscore)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.008018 0.154494 0.269406 0.294000 0.413508 0.850851
✅ (iii) Bắt cặp (matching) theo propensity score + phân tích tỉ lệ tử vong bằng logistic regression Ở đây sử dụng gói MatchIt.
library(MatchIt)
# Thực hiện nearest-neighbor matching 1:1 theo propensity score
m.out <- matchit(treat ~ age + sex + weight + height + smoking + activity + heart + hypert + diabetes + cancer + lung, data = data, method = "nearest", distance = data$pscore)
# Dataset sau khi matching
matched.data <- match.data(m.out)
➤ Hồi qui logistic trên dữ liệu đã matching
# Logistic regression trên mẫu đã matched
model_matched <- glm(death ~ treat, data = matched.data, family = binomial)
summary(model_matched)
##
## Call:
## glm(formula = death ~ treat, family = binomial, data = matched.data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.65924 0.15914 -10.426 <0.0000000000000002 ***
## treat 0.02511 0.22411 0.112 0.911
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 520.09 on 587 degrees of freedom
## Residual deviance: 520.08 on 586 degrees of freedom
## AIC: 524.08
##
## Number of Fisher Scoring iterations: 3
✅ (iv) Phân tích toàn mẫu, điều chỉnh bằng propensity score
Có 2 cách phổ biến:
A. Adjusting by Covariate (PS as covariate)
model_adjust_ps <- glm(death ~ treat + pscore, data = data, family = binomial)
summary(model_adjust_ps)
##
## Call:
## glm(formula = death ~ treat + pscore, family = binomial, data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.92027 0.15967 -5.764 0.00000000823 ***
## treat -0.04003 0.19664 -0.204 0.83869
## pscore -1.81538 0.55690 -3.260 0.00111 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 978.22 on 999 degrees of freedom
## Residual deviance: 964.82 on 997 degrees of freedom
## AIC: 970.82
##
## Number of Fisher Scoring iterations: 4
B. PS stratification (tuỳ chọn)
# Chia PS thành 5 quintiles
data$ps_quintile <- cut(data$pscore, breaks = quantile(data$pscore, probs = seq(0,1,0.2)), include.lowest = TRUE)
model_strata <- glm(death ~ treat + ps_quintile, data = data, family = binomial)
summary(model_strata)
##
## Call:
## glm(formula = death ~ treat + ps_quintile, family = binomial,
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.84649 0.15532 -5.450 0.0000000504 ***
## treat -0.00899 0.19779 -0.045 0.963748
## ps_quintile(0.136,0.226] -0.53824 0.23524 -2.288 0.022138 *
## ps_quintile(0.226,0.32] -0.80896 0.25078 -3.226 0.001256 **
## ps_quintile(0.32,0.444] -0.73618 0.24795 -2.969 0.002987 **
## ps_quintile(0.444,0.851] -1.04940 0.27712 -3.787 0.000153 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 978.22 on 999 degrees of freedom
## Residual deviance: 957.28 on 994 degrees of freedom
## AIC: 969.28
##
## Number of Fisher Scoring iterations: 4
PROMPT: Phân tích phân tầng của propensity score (stratified PSA) không phải là hiệu chỉnh biến Propensity score ở dạng quintiles (thay vì continuous) mà cần ước tính ảnh hưởng can thiệp trong từng quintile sau đó ước tính hiệu quả tổng hợp sau phân tầng.
ChatGPT: Bạn hoàn toàn đúng — Stratified PSA ≠ đưa quintile vào mô hình như một biến điều chỉnh. Phân tích đúng phải gồm 2 bước: (i) Ước tính hiệu quả can thiệp riêng trong từng tầng (quintile) (ii) Tổng hợp hiệu quả từ các tầng theo phương pháp meta-analysis (inverse‐variance weighting). Dưới đây là R code hoàn toàn đúng theo chuẩn Stratified Propensity Score Analysis.
✅ Bước 1 — Chia propensity score thành 5 quintiles
✅ Bước 2 — Ước tính ảnh hưởng can thiệp trong TỪNG quintile
✅ Bước 3 — Ước tính hiệu quả tổng hợp sau phân tầng