# Reading data into R
t = "D:\\Analysis\\Arrest data for cox model.csv"
arr = read.csv(t)
# How many rows and columns
dim(arr)
## [1] 432 11
head(arr)
## id week arrest finance age race work married parole prior educ
## 1 1 20 1 no 27 black no not married yes 3 3
## 2 2 17 1 no 18 black no not married yes 8 4
## 3 3 25 1 no 19 other yes not married yes 13 3
## 4 4 52 0 yes 23 black yes married yes 1 5
## 5 5 52 0 no 19 other yes not married yes 3 3
## 6 6 52 0 no 24 black yes not married no 2 4
tail(arr)
## id week arrest finance age race work married parole prior educ
## 427 427 12 1 yes 22 black yes married yes 2 4
## 428 428 52 0 yes 31 other yes not married yes 3 3
## 429 429 52 0 no 20 black no not married yes 1 4
## 430 430 52 0 yes 20 black yes married yes 1 3
## 431 431 52 0 no 29 black yes not married yes 3 4
## 432 432 52 0 yes 24 black yes not married yes 1 4
arr$arrest1[arr$arrest == 1] = "Yes"
arr$arrest1[arr$arrest == 0] = "No"
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ age + arrest1 + race + parole + factor(educ) | finance, data = arr)
| no (N=216) |
yes (N=216) |
Overall (N=432) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 24.2 (5.73) | 25.0 (6.47) | 24.6 (6.11) |
| Median [Min, Max] | 23.0 [17.0, 44.0] | 23.0 [17.0, 44.0] | 23.0 [17.0, 44.0] |
| arrest1 | |||
| No | 150 (69.4%) | 168 (77.8%) | 318 (73.6%) |
| Yes | 66 (30.6%) | 48 (22.2%) | 114 (26.4%) |
| race | |||
| black | 185 (85.6%) | 194 (89.8%) | 379 (87.7%) |
| other | 31 (14.4%) | 22 (10.2%) | 53 (12.3%) |
| parole | |||
| no | 81 (37.5%) | 84 (38.9%) | 165 (38.2%) |
| yes | 135 (62.5%) | 132 (61.1%) | 267 (61.8%) |
| factor(educ) | |||
| 2 | 17 (7.9%) | 7 (3.2%) | 24 (5.6%) |
| 3 | 117 (54.2%) | 122 (56.5%) | 239 (55.3%) |
| 4 | 57 (26.4%) | 62 (28.7%) | 119 (27.5%) |
| 5 | 21 (9.7%) | 18 (8.3%) | 39 (9.0%) |
| 6 | 4 (1.9%) | 7 (3.2%) | 11 (2.5%) |
library(compareGroups)
t = compareGroups(finance ~ age + race + prior + parole, data = arr)
createTable(t)
##
## --------Summary descriptives table by 'finance'---------
##
## ___________________________________________
## no yes p.overall
## N=216 N=216
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## age 24.2 (5.73) 25.0 (6.47) 0.203
## race: 0.241
## black 185 (85.6%) 194 (89.8%)
## other 31 (14.4%) 22 (10.2%)
## prior 2.99 (2.92) 2.98 (2.88) 0.987
## parole: 0.843
## no 81 (37.5%) 84 (38.9%)
## yes 135 (62.5%) 132 (61.1%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
hist(arr$week,
main = "Phân bố tuần bị bắt",
xlab = "Tuần",
ylab = "Số đối tượng",
col = "blue",
border = "white")
# Using ggplot2
library(ggplot2)
ggplot(data = arr, aes(x = week)) + geom_histogram(
fill = "blue",
col = "white") + labs(title="Phân bố tuần bị bắt", x="Số tuần", y="Số đối tượng")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.