#install.packages(c("readxl", "tidyverse", "ggplot2", "GGally", "table1", "compareGroups", "DescTools", "simpleboot", "boot", "epiDisplay", "caret", "rms", "pROC", “GGally”, “carData”, “Publish”))
library(readxl)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.8 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(table1)
##
## Attaching package: 'table1'
##
## The following objects are masked from 'package:base':
##
## units, units<-
library(compareGroups)
arr = read.csv("C:\\VN trips\\VN trip 4 (Dec 2022)\\VLU\\Regression analysis\\Datasets\\Arrest data.csv", header = TRUE)
dim(arr)
## [1] 432 11
head(arr)
## id week arrest finance age race work married parole prior educ
## 1 1 20 1 no 27 black no not married yes 3 3
## 2 2 17 1 no 18 black no not married yes 8 4
## 3 3 25 1 no 19 other yes not married yes 13 3
## 4 4 52 0 yes 23 black yes married yes 1 5
## 5 5 52 0 no 19 other yes not married yes 3 3
## 6 6 52 0 no 24 black yes not married no 2 4
tail(arr)
## id week arrest finance age race work married parole prior educ
## 427 427 12 1 yes 22 black yes married yes 2 4
## 428 428 52 0 yes 31 other yes not married yes 3 3
## 429 429 52 0 no 20 black no not married yes 1 4
## 430 430 52 0 yes 20 black yes married yes 1 3
## 431 431 52 0 no 29 black yes not married yes 3 4
## 432 432 52 0 yes 24 black yes not married yes 1 4
arr$arrest1[arr$arrest == 1] = "Yes"
arr$arrest1[arr$arrest == 0] = "No"
arr$fin[arr$finance == "yes"] = 1
arr$fin[arr$finance == "no"] = 0
summary(arr)
## id week arrest finance
## Min. : 1.0 Min. : 1.00 Min. :0.0000 Length:432
## 1st Qu.:108.8 1st Qu.:50.00 1st Qu.:0.0000 Class :character
## Median :216.5 Median :52.00 Median :0.0000 Mode :character
## Mean :216.5 Mean :45.85 Mean :0.2639
## 3rd Qu.:324.2 3rd Qu.:52.00 3rd Qu.:1.0000
## Max. :432.0 Max. :52.00 Max. :1.0000
## age race work married
## Min. :17.0 Length:432 Length:432 Length:432
## 1st Qu.:20.0 Class :character Class :character Class :character
## Median :23.0 Mode :character Mode :character Mode :character
## Mean :24.6
## 3rd Qu.:27.0
## Max. :44.0
## parole prior educ arrest1
## Length:432 Min. : 0.000 Min. :2.000 Length:432
## Class :character 1st Qu.: 1.000 1st Qu.:3.000 Class :character
## Mode :character Median : 2.000 Median :3.000 Mode :character
## Mean : 2.984 Mean :3.477
## 3rd Qu.: 4.000 3rd Qu.:4.000
## Max. :18.000 Max. :6.000
## fin
## Length:432
## Class :character
## Mode :character
##
##
##
table1(~ age + finance + fin + arrest + arrest1 + race + parole + educ, data=arr)
Overall (N=432) |
|
---|---|
age | |
Mean (SD) | 24.6 (6.11) |
Median [Min, Max] | 23.0 [17.0, 44.0] |
finance | |
no | 216 (50.0%) |
yes | 216 (50.0%) |
fin | |
0 | 216 (50.0%) |
1 | 216 (50.0%) |
arrest | |
Mean (SD) | 0.264 (0.441) |
Median [Min, Max] | 0 [0, 1.00] |
arrest1 | |
No | 318 (73.6%) |
Yes | 114 (26.4%) |
race | |
black | 379 (87.7%) |
other | 53 (12.3%) |
parole | |
no | 165 (38.2%) |
yes | 267 (61.8%) |
educ | |
Mean (SD) | 3.48 (0.834) |
Median [Min, Max] | 3.00 [2.00, 6.00] |
table1(~ age + arrest + arrest1 + race + parole + educ | finance, data=arr)
no (N=216) |
yes (N=216) |
Overall (N=432) |
|
---|---|---|---|
age | |||
Mean (SD) | 24.2 (5.73) | 25.0 (6.47) | 24.6 (6.11) |
Median [Min, Max] | 23.0 [17.0, 44.0] | 23.0 [17.0, 44.0] | 23.0 [17.0, 44.0] |
arrest | |||
Mean (SD) | 0.306 (0.462) | 0.222 (0.417) | 0.264 (0.441) |
Median [Min, Max] | 0 [0, 1.00] | 0 [0, 1.00] | 0 [0, 1.00] |
arrest1 | |||
No | 150 (69.4%) | 168 (77.8%) | 318 (73.6%) |
Yes | 66 (30.6%) | 48 (22.2%) | 114 (26.4%) |
race | |||
black | 185 (85.6%) | 194 (89.8%) | 379 (87.7%) |
other | 31 (14.4%) | 22 (10.2%) | 53 (12.3%) |
parole | |||
no | 81 (37.5%) | 84 (38.9%) | 165 (38.2%) |
yes | 135 (62.5%) | 132 (61.1%) | 267 (61.8%) |
educ | |||
Mean (SD) | 3.44 (0.844) | 3.52 (0.824) | 3.48 (0.834) |
Median [Min, Max] | 3.00 [2.00, 6.00] | 3.00 [2.00, 6.00] | 3.00 [2.00, 6.00] |
# Cách 1:
t = compareGroups(finance ~ age + race + prior + educ + prior + work + married + parole + arrest, data = arr)
createTable(t)
##
## --------Summary descriptives table by 'finance'---------
##
## _________________________________________________
## no yes p.overall
## N=216 N=216
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 24.2 (5.73) 25.0 (6.47) 0.203
## race: 0.241
## black 185 (85.6%) 194 (89.8%)
## other 31 (14.4%) 22 (10.2%)
## prior 2.99 (2.92) 2.98 (2.88) 0.987
## educ 3.44 (0.84) 3.52 (0.82) 0.300
## prior 2.99 (2.92) 2.98 (2.88) 0.987
## work: 1.000
## no 93 (43.1%) 92 (42.6%)
## yes 123 (56.9%) 124 (57.4%)
## married: 0.557
## married 29 (13.4%) 24 (11.1%)
## not married 187 (86.6%) 192 (88.9%)
## parole: 0.843
## no 81 (37.5%) 84 (38.9%)
## yes 135 (62.5%) 132 (61.1%)
## arrest 0.31 (0.46) 0.22 (0.42) 0.050
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Cách 2:
createTable(compareGroups(finance ~ age + race + prior + educ + prior + work + married + parole + arrest, data = arr))
##
## --------Summary descriptives table by 'finance'---------
##
## _________________________________________________
## no yes p.overall
## N=216 N=216
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 24.2 (5.73) 25.0 (6.47) 0.203
## race: 0.241
## black 185 (85.6%) 194 (89.8%)
## other 31 (14.4%) 22 (10.2%)
## prior 2.99 (2.92) 2.98 (2.88) 0.987
## educ 3.44 (0.84) 3.52 (0.82) 0.300
## prior 2.99 (2.92) 2.98 (2.88) 0.987
## work: 1.000
## no 93 (43.1%) 92 (42.6%)
## yes 123 (56.9%) 124 (57.4%)
## married: 0.557
## married 29 (13.4%) 24 (11.1%)
## not married 187 (86.6%) 192 (88.9%)
## parole: 0.843
## no 81 (37.5%) 84 (38.9%)
## yes 135 (62.5%) 132 (61.1%)
## arrest 0.31 (0.46) 0.22 (0.42) 0.050
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
# Nếu biến số prior không theo phân bố chuẩn (normal distribution)
createTable(compareGroups(finance ~ age + race + prior + educ + prior + work + married + parole + arrest, data = arr, method = c(prior = 2)))
##
## --------Summary descriptives table by 'finance'---------
##
## ___________________________________________________________
## no yes p.overall
## N=216 N=216
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 24.2 (5.73) 25.0 (6.47) 0.203
## race: 0.241
## black 185 (85.6%) 194 (89.8%)
## other 31 (14.4%) 22 (10.2%)
## prior 2.00 [1.00;4.00] 2.00 [1.00;4.00] 0.773
## educ 3.44 (0.84) 3.52 (0.82) 0.300
## prior 2.99 (2.92) 2.98 (2.88) 0.987
## work: 1.000
## no 93 (43.1%) 92 (42.6%)
## yes 123 (56.9%) 124 (57.4%)
## married: 0.557
## married 29 (13.4%) 24 (11.1%)
## not married 187 (86.6%) 192 (88.9%)
## parole: 0.843
## no 81 (37.5%) 84 (38.9%)
## yes 135 (62.5%) 132 (61.1%)
## arrest 0.31 (0.46) 0.22 (0.42) 0.050
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
t.test(arr$prior ~ arr$arrest)
##
## Welch Two Sample t-test
##
## data: arr$prior by arr$arrest
## t = -2.9319, df = 155.9, p-value = 0.003877
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -1.7920133 -0.3493306
## sample estimates:
## mean in group 0 mean in group 1
## 2.701258 3.771930
hist(arr$prior)
library(simpleboot)
## Simple Bootstrap Routines (1.1-7)
library(tidyverse)
arrest = arr %>% filter(arrest == 1)
non.arrest = arr %>% filter(arrest == 0)
set.seed(1234)
b.means = two.boot(arrest$prior, non.arrest$prior, mean, R = 1000)
hist (b.means$t, breaks = 20)
quantile(b.means$t, probs=c(0.025, 0.50, 0.975))
## 2.5% 50% 97.5%
## 0.3638572 1.0390599 1.8544729