arr = read.csv("C:/Users/thien/OneDrive/Desktop/R thuc hanh/R learning/SUMS - R class data/Arrest dataset.csv") # Note: data set path
str(arr)
## 'data.frame': 432 obs. of 12 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ age : int 27 18 19 23 19 24 25 21 22 20 ...
## $ finance : chr "no" "no" "no" "yes" ...
## $ week : int 20 17 25 52 52 52 23 52 52 52 ...
## $ arrest : int 1 1 1 0 0 0 1 0 0 0 ...
## $ race : chr "black" "black" "other" "black" ...
## $ work.exp: chr "no" "no" "yes" "yes" ...
## $ married : chr "not married" "not married" "not married" "married" ...
## $ parole : chr "yes" "yes" "yes" "yes" ...
## $ prior : int 3 8 13 1 3 2 0 4 6 0 ...
## $ educ : int 3 4 3 5 3 4 4 3 3 5 ...
## $ employ1 : chr "no" "no" "no" "no" ...
dim(arr)
## [1] 432 12
# Based on visual inspection.
x <- round(rnorm(1000, mean=10, sd=3), 2)
hist(x)
plot(density(x))
qqnorm(x)
qqline(x, col=2)
# statistical test, or regression.
# Eye
hist(arr$age)
qqnorm(arr$age)
qqline(arr$age, col = 2)
# Test
shapiro.test(arr$age)
##
## Shapiro-Wilk normality test
##
## data: arr$age
## W = 0.84992, p-value < 2.2e-16
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ age + finance + week + arrest + married, data = arr)
| Overall (N=432) |
|
|---|---|
| age | |
| Mean (SD) | 24.6 (6.11) |
| Median [Min, Max] | 23.0 [17.0, 44.0] |
| finance | |
| no | 216 (50.0%) |
| yes | 216 (50.0%) |
| week | |
| Mean (SD) | 45.9 (12.7) |
| Median [Min, Max] | 52.0 [1.00, 52.0] |
| arrest | |
| Mean (SD) | 0.264 (0.441) |
| Median [Min, Max] | 0 [0, 1.00] |
| married | |
| married | 53 (12.3%) |
| not married | 379 (87.7%) |
table1(~ age + finance + week + arrest + married | race, data = arr)
| black (N=379) |
other (N=53) |
Overall (N=432) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 24.6 (6.06) | 24.6 (6.53) | 24.6 (6.11) |
| Median [Min, Max] | 23.0 [17.0, 44.0] | 22.0 [17.0, 42.0] | 23.0 [17.0, 44.0] |
| finance | |||
| no | 185 (48.8%) | 31 (58.5%) | 216 (50.0%) |
| yes | 194 (51.2%) | 22 (41.5%) | 216 (50.0%) |
| week | |||
| Mean (SD) | 45.6 (13.0) | 48.0 (9.73) | 45.9 (12.7) |
| Median [Min, Max] | 52.0 [1.00, 52.0] | 52.0 [7.00, 52.0] | 52.0 [1.00, 52.0] |
| arrest | |||
| Mean (SD) | 0.269 (0.444) | 0.226 (0.423) | 0.264 (0.441) |
| Median [Min, Max] | 0 [0, 1.00] | 0 [0, 1.00] | 0 [0, 1.00] |
| married | |||
| married | 44 (11.6%) | 9 (17.0%) | 53 (12.3%) |
| not married | 335 (88.4%) | 44 (83.0%) | 379 (87.7%) |