t = "C:\\Users\\Admin\\Desktop\\rejection.csv"
library(MatchIt)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gmodels)
duy = read.csv(t)
duy$c0_pl = ifelse(duy$c0 <= 5 , 0, 1)
attach(duy)
head(duy)
## id sex age hla relationship time c0 creatinin alb hct hb
## 1 1 1 37 6-Jun Deceased 90 3.2 453 31.4 0.1700 92.0
## 2 2 1 37 2-Jun Living related 90 7.2 317 37.2 0.2250 81.0
## 3 5 1 44 5-Jun Living related 90 16.4 546 33.2 0.2700 96.0
## 4 6 1 41 3-Jun Living unrelated 90 10.8 438 35.5 0.2746 112.0
## 5 7 1 36 3-Jun Living unrelated 90 6.3 205 41.3 0.2800 81.0
## 6 9 1 33 3-Jun Living related 90 8.1 396 38.5 0.2900 97.5
## rejection c0_tb creatinin_tb alb_tb hct_tb hb_tb c0_pl
## 1 0 3.720000 201.4000 33.52000 0.1918000 93.55000 0
## 2 0 4.916667 186.8333 38.88333 0.2260667 103.26667 1
## 3 0 10.870000 279.3000 36.72222 0.2700000 97.55556 1
## 4 0 8.530000 171.7000 35.17000 0.2749600 110.40000 1
## 5 0 3.940000 107.2000 39.70000 0.2800000 97.80000 1
## 6 0 9.766667 172.5000 42.66000 0.2900000 114.91667 1
các biến đưa vào mô hình đồng nhất
xvars <- c("sex", "alb", "hct")
Loại các giá trị trống
duy %>% group_by(c0_pl) %>% select(one_of(xvars)) %>% summarise_all(funs(mean(., na.rm = T)))
## Adding missing grouping variables: `c0_pl`
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
## # A tibble: 2 x 4
## c0_pl sex alb hct
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0.818 37.1 0.392
## 2 1 0.764 37.6 0.379
Xây dưng mô hình hồi quy logistic phân loại C0 với các biến: tuổi, giới, albumin và hct
m_ps <- glm(c0_pl ~ sex + alb_tb + hct_tb, family = binomial(), data = duy)
summary(m_ps)
##
## Call:
## glm(formula = c0_pl ~ sex + alb_tb + hct_tb, family = binomial(),
## data = duy)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1565 0.3728 0.5666 0.7171 1.2450
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0277 3.8496 -0.267 0.790
## sex -1.0117 0.8312 -1.217 0.224
## alb_tb 0.1314 0.1003 1.310 0.190
## hct_tb -4.8573 4.4206 -1.099 0.272
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 75.06 on 74 degrees of freedom
## Residual deviance: 71.27 on 71 degrees of freedom
## (19 observations deleted due to missingness)
## AIC: 79.27
##
## Number of Fisher Scoring iterations: 4
Giá trị dự đoán của các bệnh nhân
prs_df <- data.frame(pr_score = predict(m_ps, type = "response"), c0_pl = m_ps$model$c0_pl)
head(prs_df)
## pr_score c0_pl
## 1 0.8074072 0
## 2 0.8777719 1
## 3 0.8136871 1
## 4 0.7766254 1
## 5 0.8601850 1
## 6 0.8963369 1
Phân bố điểm xác suất (Propensity score) ở 2 nhóm C0 thấp và Cao
labs <- paste("Phan nhom C0", c("Thap", "Cao"))
prs_df %>% mutate(c0_pl = ifelse(c0_pl == 1, labs[1], labs[2])) %>% ggplot(aes(x = pr_score)) + geom_histogram(color = "white") + facet_wrap(~c0_pl) + xlab("Xac suat") + theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Ghép cặp
#Loại giá trị trống
ecls_nomiss <- duy %>% select(rejection, c0_pl, one_of(xvars)) %>% na.omit()
#Ghép cặp
mod_match <- matchit(c0_pl ~ sex + alb + hct, method = "nearest", data = ecls_nomiss)
## Warning in matchit2nearest(c(`1` = 0, `2` = 1, `3` = 1, `4` = 1, `5` = 1, :
## Fewer control than treated units and matching without replacement. Not all
## treated units will receive a match. Treated units will be matched in the order
## specified by m.order: largest
dta_m <- match.data(mod_match)
dim(dta_m)
## [1] 28 7
So sánh 2 nhóm: 0 - Thấp; 1 - Cao
lapply(xvars, function(v) {t.test(dta_m[, v] ~ dta_m$c0_pl)})
## [[1]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$c0_pl
## t = 3.6056, df = 24.471, p-value = 0.001387
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2446626 0.8981946
## sample estimates:
## mean in group 0 mean in group 1
## 0.8571429 0.2857143
##
##
## [[2]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$c0_pl
## t = -2.5803, df = 23.824, p-value = 0.01647
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.8020653 -0.7550776
## sample estimates:
## mean in group 0 mean in group 1
## 36.59286 40.37143
##
##
## [[3]]
##
## Welch Two Sample t-test
##
## data: dta_m[, v] by dta_m$c0_pl
## t = 0.8486, df = 23.336, p-value = 0.4047
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03671533 0.08785819
## sample estimates:
## mean in group 0 mean in group 1
## 0.3951071 0.3695357
Tạo bảng số BN thải ghép ở nhóm C0 cao và thấp
table(dta_m$rejection, dta_m$c0_pl)
##
## 0 1
## 0 14 14