Đọc dữ liệu nghiên cứu Arrest dataset.csv và gọi đối
tượng là arr.
arr= read.csv ("D:\\OneDrive\\Statistical courses\\Can Tho University of Medicine_Sep2022\\Data for practice\\Arrest dataset.csv")
names (arr)
## [1] "id" "age" "finance" "week" "arrest" "race"
## [7] "work.exp" "married" "parole" "prior" "educ" "employ1"
Dùng table1::table1 để có cái nhìn chung về nghiên cứu
này:
table1::table1 (~ age + finance + race + work.exp + married + parole + prior + factor(educ)|arrest, data= arr)
## Warning in table1.formula(~age + finance + race + work.exp + married + parole
## + : Terms to the right of '|' in formula 'x' define table columns and are
## expected to be factors with meaningful labels.
| 0 (N=318) |
1 (N=114) |
Overall (N=432) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 25.3 (6.31) | 22.8 (5.12) | 24.6 (6.11) |
| Median [Min, Max] | 23.0 [17.0, 44.0] | 21.0 [17.0, 44.0] | 23.0 [17.0, 44.0] |
| finance | |||
| no | 150 (47.2%) | 66 (57.9%) | 216 (50.0%) |
| yes | 168 (52.8%) | 48 (42.1%) | 216 (50.0%) |
| race | |||
| black | 277 (87.1%) | 102 (89.5%) | 379 (87.7%) |
| other | 41 (12.9%) | 12 (10.5%) | 53 (12.3%) |
| work.exp | |||
| no | 123 (38.7%) | 62 (54.4%) | 185 (42.8%) |
| yes | 195 (61.3%) | 52 (45.6%) | 247 (57.2%) |
| married | |||
| married | 45 (14.2%) | 8 (7.0%) | 53 (12.3%) |
| not married | 273 (85.8%) | 106 (93.0%) | 379 (87.7%) |
| parole | |||
| no | 119 (37.4%) | 46 (40.4%) | 165 (38.2%) |
| yes | 199 (62.6%) | 68 (59.6%) | 267 (61.8%) |
| prior | |||
| Mean (SD) | 2.70 (2.55) | 3.77 (3.59) | 2.98 (2.90) |
| Median [Min, Max] | 2.00 [0, 15.0] | 3.00 [0, 18.0] | 2.00 [0, 18.0] |
| factor(educ) | |||
| 2 | 20 (6.3%) | 4 (3.5%) | 24 (5.6%) |
| 3 | 162 (50.9%) | 77 (67.5%) | 239 (55.3%) |
| 4 | 92 (28.9%) | 27 (23.7%) | 119 (27.5%) |
| 5 | 34 (10.7%) | 5 (4.4%) | 39 (9.0%) |
| 6 | 10 (3.1%) | 1 (0.9%) | 11 (2.5%) |
Có bao nhiêu người bị bắt trở lại trong thời gian nghiên cứu? Tình
hình bị bắt trở lại có khác biệt giữa các race hay
không?
#Số người bị bắt trở lại trong thời gian nghiên cứu
table (arr$arrest) ; prop.table(table (arr$arrest))
##
## 0 1
## 318 114
##
## 0 1
## 0.7361111 0.2638889
# Tình hình bị bắt trở lại có khác biệt giữa các race hay không?
DescTools::Desc(arr$race ~ arr$arrest)
## ------------------------------------------------------------------------------
## arr$race ~ arr$arrest
##
## Summary:
## n: 432, rows: 2, columns: 2
##
## Pearson's Chi-squared test (cont. adj):
## X-squared = 0.24452, df = 1, p-value = 0.621
## Fisher's exact test p-value = 0.6183
## McNemar's chi-squared = 25.175, df = 1, p-value = 5.236e-07
##
## estimate lwr.ci upr.ci'
##
## odds ratio 0.795 0.402 1.572
## rel. risk (col1) 0.945 0.807 1.106
## rel. risk (col2) 1.189 0.703 2.008
##
##
## Phi-Coefficient 0.032
## Contingency Coeff. 0.032
## Cramer's V 0.032
##
##
## arr$arrest 0 1 Sum
## arr$race
##
## black freq 277 102 379
## perc 64.1% 23.6% 87.7%
## p.row 73.1% 26.9% .
## p.col 87.1% 89.5% .
##
## other freq 41 12 53
## perc 9.5% 2.8% 12.3%
## p.row 77.4% 22.6% .
## p.col 12.9% 10.5% .
##
## Sum freq 318 114 432
## perc 73.6% 26.4% 100.0%
## p.row . . .
## p.col . . .
##
##
## ----------
## ' 95% conf. level
library(survival)
s <- survfit (Surv (week, arrest) ~ race, data= arr)
plot(s)
library(survminer)
## Warning: package 'survminer' was built under R version 4.1.3
## Loading required package: ggplot2
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.1.3
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
ggsurvplot(s)
ggsurvplot(s, conf.int= TRUE, pval=TRUE, risk.table=TRUE, legend.labs= c("Black", "Other"), legend.title= "Race", palette= c("orchid2", "dodgerblue2"), title= "Kaplan-Meier Curve", risk.table.height= .3, ylab= "Cumulative probability", xlab= "Time to arrest")
library (survival)
s2 <- coxph (Surv (week, arrest) ~ factor (race), data= arr)
summary (s2)
## Call:
## coxph(formula = Surv(week, arrest) ~ factor(race), data = arr)
##
## n= 432, number of events= 114
##
## coef exp(coef) se(coef) z Pr(>|z|)
## factor(race)other -0.2308 0.7939 0.3052 -0.756 0.449
##
## exp(coef) exp(-coef) lower .95 upper .95
## factor(race)other 0.7939 1.26 0.4365 1.444
##
## Concordance= 0.514 (se = 0.014 )
## Likelihood ratio test= 0.61 on 1 df, p=0.4
## Wald test = 0.57 on 1 df, p=0.4
## Score (logrank) test = 0.57 on 1 df, p=0.4
Hỗ trợ tài chánh và tình trạng hôn nhân có ảnh hưởng đến nguy cơ bị bắt trở lại không?
arr$race = as.factor (arr$race)
s3 <- coxph (Surv (week, arrest) ~ race + finance + married, data= arr)
summary (s3)
## Call:
## coxph(formula = Surv(week, arrest) ~ race + finance + married,
## data = arr)
##
## n= 432, number of events= 114
##
## coef exp(coef) se(coef) z Pr(>|z|)
## raceother -0.2396 0.7869 0.3060 -0.783 0.4336
## financeyes -0.3965 0.6727 0.1902 -2.085 0.0371 *
## marriednot married 0.7260 2.0668 0.3671 1.977 0.0480 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## raceother 0.7869 1.2707 0.4320 1.4335
## financeyes 0.6727 1.4866 0.4634 0.9765
## marriednot married 2.0668 0.4838 1.0064 4.2444
##
## Concordance= 0.578 (se = 0.024 )
## Likelihood ratio test= 9.5 on 3 df, p=0.02
## Wald test = 8.56 on 3 df, p=0.04
## Score (logrank) test = 8.77 on 3 df, p=0.03
Vẽ biểu đồ forest plot
ggforest (s3)
## Warning in .get_data(model, data = data): The `data` argument is not provided.
## Data will be extracted from model fit.
Kiểm tra giả định
test = cox.zph(s3)
ggcoxzph(test)
t = survfit(s3, newdata= data.frame (finance = 'yes', race = 'black', married = 'married'))
plot (t)