library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
library(readxl)
library(table1)
##
## Attaching package: 'table1'
##
## The following objects are masked from 'package:base':
##
## units, units<-
t = "D:\\Data for practice\\Arrest dataset.csv"
arr = read.csv(t, header=T)
# Liet ke 6 dong dau tien
head(arr)
## id age finance week arrest race work.exp married parole prior educ
## 1 1 27 no 20 1 black no not married yes 3 3
## 2 2 18 no 17 1 black no not married yes 8 4
## 3 3 19 no 25 1 other yes not married yes 13 3
## 4 4 23 yes 52 0 black yes married yes 1 5
## 5 5 19 no 52 0 other yes not married yes 3 3
## 6 6 24 no 52 0 black yes not married no 2 4
## employ1
## 1 no
## 2 no
## 3 no
## 4 no
## 5 no
## 6 no
# Liet ke 6 dong sau cung
tail(arr)
## id age finance week arrest race work.exp married parole prior educ
## 427 427 22 yes 12 1 black yes married yes 2 4
## 428 428 31 yes 52 0 other yes not married yes 3 3
## 429 429 20 no 52 0 black no not married yes 1 4
## 430 430 20 yes 52 0 black yes married yes 1 3
## 431 431 29 no 52 0 black yes not married yes 3 4
## 432 432 24 yes 52 0 black yes not married yes 1 4
## employ1
## 427 no
## 428 no
## 429 no
## 430 no
## 431 yes
## 432 yes
# Coding
arr$arrest1[arr$arrest == 1] = "Yes"
arr$arrest1[arr$arrest == 0] = "No"
# Tim hieu so dong va so cot
dim(arr)
## [1] 432 13
# Tom tat du lieu
summary(arr)
## id age finance week
## Min. : 1.0 Min. :17.0 Length:432 Min. : 1.00
## 1st Qu.:108.8 1st Qu.:20.0 Class :character 1st Qu.:50.00
## Median :216.5 Median :23.0 Mode :character Median :52.00
## Mean :216.5 Mean :24.6 Mean :45.85
## 3rd Qu.:324.2 3rd Qu.:27.0 3rd Qu.:52.00
## Max. :432.0 Max. :44.0 Max. :52.00
## arrest race work.exp married
## Min. :0.0000 Length:432 Length:432 Length:432
## 1st Qu.:0.0000 Class :character Class :character Class :character
## Median :0.0000 Mode :character Mode :character Mode :character
## Mean :0.2639
## 3rd Qu.:1.0000
## Max. :1.0000
## parole prior educ employ1
## Length:432 Min. : 0.000 Min. :2.000 Length:432
## Class :character 1st Qu.: 1.000 1st Qu.:3.000 Class :character
## Mode :character Median : 2.000 Median :3.000 Mode :character
## Mean : 2.984 Mean :3.477
## 3rd Qu.: 4.000 3rd Qu.:4.000
## Max. :18.000 Max. :6.000
## arrest1
## Length:432
## Class :character
## Mode :character
##
##
##
# Phan tich mo ta
table1(~ age + finance + arrest + arrest1 + race + parole + educ, data=arr)
| Overall (N=432) |
|
|---|---|
| age | |
| Mean (SD) | 24.6 (6.11) |
| Median [Min, Max] | 23.0 [17.0, 44.0] |
| finance | |
| no | 216 (50.0%) |
| yes | 216 (50.0%) |
| arrest | |
| Mean (SD) | 0.264 (0.441) |
| Median [Min, Max] | 0 [0, 1.00] |
| arrest1 | |
| No | 318 (73.6%) |
| Yes | 114 (26.4%) |
| race | |
| black | 379 (87.7%) |
| other | 53 (12.3%) |
| parole | |
| no | 165 (38.2%) |
| yes | 267 (61.8%) |
| educ | |
| Mean (SD) | 3.48 (0.834) |
| Median [Min, Max] | 3.00 [2.00, 6.00] |
table1(~ age + finance + arrest + arrest1 + race + parole + educ | race, data=arr)
| black (N=379) |
other (N=53) |
Overall (N=432) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 24.6 (6.06) | 24.6 (6.53) | 24.6 (6.11) |
| Median [Min, Max] | 23.0 [17.0, 44.0] | 22.0 [17.0, 42.0] | 23.0 [17.0, 44.0] |
| finance | |||
| no | 185 (48.8%) | 31 (58.5%) | 216 (50.0%) |
| yes | 194 (51.2%) | 22 (41.5%) | 216 (50.0%) |
| arrest | |||
| Mean (SD) | 0.269 (0.444) | 0.226 (0.423) | 0.264 (0.441) |
| Median [Min, Max] | 0 [0, 1.00] | 0 [0, 1.00] | 0 [0, 1.00] |
| arrest1 | |||
| No | 277 (73.1%) | 41 (77.4%) | 318 (73.6%) |
| Yes | 102 (26.9%) | 12 (22.6%) | 114 (26.4%) |
| race | |||
| black | 379 (100%) | 0 (0%) | 379 (87.7%) |
| other | 0 (0%) | 53 (100%) | 53 (12.3%) |
| parole | |||
| no | 142 (37.5%) | 23 (43.4%) | 165 (38.2%) |
| yes | 237 (62.5%) | 30 (56.6%) | 267 (61.8%) |
| educ | |||
| Mean (SD) | 3.51 (0.831) | 3.26 (0.836) | 3.48 (0.834) |
| Median [Min, Max] | 3.00 [2.00, 6.00] | 3.00 [2.00, 6.00] | 3.00 [2.00, 6.00] |
# Ve bieu do phan bo
hist(arr$week, col="blue", border="white", main="Phân bố tuần bị bắt")
ggplot(data=arr, aes(x=week)) + geom_histogram(fill="blue", col="white") + labs(title="Phân bố tuần bị bắt", x="Số đối tượng", y="Số đối tượng")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.