Propensity Score Matching

t = "C:\\Users\\Admin\\Desktop\\rejection.csv"
library(MatchIt)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(gmodels)
duy = read.csv(t)
duy$c0_pl = ifelse(duy$c0 <= 5 , 0, 1)
attach(duy)
head(duy)

##   id sex age   hla     relationship time   c0 creatinin  alb    hct    hb
## 1  1   1  37 6-Jun         Deceased   90  3.2       453 31.4 0.1700  92.0
## 2  2   1  37 2-Jun   Living related   90  7.2       317 37.2 0.2250  81.0
## 3  5   1  44 5-Jun   Living related   90 16.4       546 33.2 0.2700  96.0
## 4  6   1  41 3-Jun Living unrelated   90 10.8       438 35.5 0.2746 112.0
## 5  7   1  36 3-Jun Living unrelated   90  6.3       205 41.3 0.2800  81.0
## 6  9   1  33 3-Jun   Living related   90  8.1       396 38.5 0.2900  97.5
##   rejection     c0_tb creatinin_tb   alb_tb    hct_tb     hb_tb c0_pl
## 1         0  3.720000     201.4000 33.52000 0.1918000  93.55000     0
## 2         0  4.916667     186.8333 38.88333 0.2260667 103.26667     1
## 3         0 10.870000     279.3000 36.72222 0.2700000  97.55556     1
## 4         0  8.530000     171.7000 35.17000 0.2749600 110.40000     1
## 5         0  3.940000     107.2000 39.70000 0.2800000  97.80000     1
## 6         0  9.766667     172.5000 42.66000 0.2900000 114.91667     1

các biến đưa vào mô hình đồng nhất

xvars <- c("sex", "alb", "hct")

Loại các giá trị trống

duy %>% group_by(c0_pl) %>% select(one_of(xvars)) %>% summarise_all(funs(mean(., na.rm = T)))

## Adding missing grouping variables: `c0_pl`

## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.

## # A tibble: 2 x 4
##   c0_pl   sex   alb   hct
##   <dbl> <dbl> <dbl> <dbl>
## 1     0 0.818  37.1 0.392
## 2     1 0.764  37.6 0.379

Xây dưng mô hình hồi quy logistic phân loại C0 với các biến: tuổi, giới, albumin và hct

m_ps <- glm(c0_pl ~ sex + alb_tb + hct_tb, family = binomial(), data = duy)
summary(m_ps)

## 
## Call:
## glm(formula = c0_pl ~ sex + alb_tb + hct_tb, family = binomial(), 
##     data = duy)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1565   0.3728   0.5666   0.7171   1.2450  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -1.0277     3.8496  -0.267    0.790
## sex          -1.0117     0.8312  -1.217    0.224
## alb_tb        0.1314     0.1003   1.310    0.190
## hct_tb       -4.8573     4.4206  -1.099    0.272
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 75.06  on 74  degrees of freedom
## Residual deviance: 71.27  on 71  degrees of freedom
##   (19 observations deleted due to missingness)
## AIC: 79.27
## 
## Number of Fisher Scoring iterations: 4

Giá trị dự đoán của các bệnh nhân

prs_df <- data.frame(pr_score = predict(m_ps, type = "response"), c0_pl = m_ps$model$c0_pl)
head(prs_df)

##    pr_score c0_pl
## 1 0.8074072     0
## 2 0.8777719     1
## 3 0.8136871     1
## 4 0.7766254     1
## 5 0.8601850     1
## 6 0.8963369     1

Phân bố điểm xác suất (Propensity score) ở 2 nhóm C0 thấp và Cao

labs <- paste("Phan nhom C0", c("Thap", "Cao"))
prs_df %>% mutate(c0_pl = ifelse(c0_pl == 1, labs[1], labs[2])) %>% ggplot(aes(x = pr_score)) + geom_histogram(color = "white") + facet_wrap(~c0_pl) + xlab("Xac suat") + theme_bw()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ghép cặp

#Loại giá trị trống
ecls_nomiss <- duy %>%  select(rejection, c0_pl, one_of(xvars)) %>%  na.omit()
#Ghép cặp
mod_match <- matchit(c0_pl ~ sex + alb + hct, method = "nearest", data = ecls_nomiss)

## Warning in matchit2nearest(c(`1` = 0, `2` = 1, `3` = 1, `4` = 1, `5` = 1, :
## Fewer control than treated units and matching without replacement. Not all
## treated units will receive a match. Treated units will be matched in the order
## specified by m.order: largest

dta_m <- match.data(mod_match)
dim(dta_m)

## [1] 28  7

So sánh 2 nhóm: 0 - Thấp; 1 - Cao

lapply(xvars, function(v) {t.test(dta_m[, v] ~ dta_m$c0_pl)})

## [[1]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m[, v] by dta_m$c0_pl
## t = 3.6056, df = 24.471, p-value = 0.001387
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2446626 0.8981946
## sample estimates:
## mean in group 0 mean in group 1 
##       0.8571429       0.2857143 
## 
## 
## [[2]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m[, v] by dta_m$c0_pl
## t = -2.5803, df = 23.824, p-value = 0.01647
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.8020653 -0.7550776
## sample estimates:
## mean in group 0 mean in group 1 
##        36.59286        40.37143 
## 
## 
## [[3]]
## 
##  Welch Two Sample t-test
## 
## data:  dta_m[, v] by dta_m$c0_pl
## t = 0.8486, df = 23.336, p-value = 0.4047
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03671533  0.08785819
## sample estimates:
## mean in group 0 mean in group 1 
##       0.3951071       0.3695357

Tạo bảng số BN thải ghép ở nhóm C0 cao và thấp

table(dta_m$rejection, dta_m$c0_pl)

##    
##      0  1
##   0 14 14

Propensity Score Matching

Duy Nguyen

12/30/2019