Reading data and loading library

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(ggplot2)

t = "/Users/tuanvnguyen/Desktop/BDA2022/Datasets/Arrest dataset.csv"
df = read.csv(t)
head(df)
##   id age finance week arrest  race work.exp     married parole prior educ
## 1  1  27      no   20      1 black       no not married    yes     3    3
## 2  2  18      no   17      1 black       no not married    yes     8    4
## 3  3  19      no   25      1 other      yes not married    yes    13    3
## 4  4  23     yes   52      0 black      yes     married    yes     1    5
## 5  5  19      no   52      0 other      yes not married    yes     3    3
## 6  6  24      no   52      0 black      yes not married     no     2    4
##   employ1
## 1      no
## 2      no
## 3      no
## 4      no
## 5      no
## 6      no

Descriptive analysis

table1(~factor(arrest) + age | finance, data=df)
no
(N=216)
yes
(N=216)
Overall
(N=432)
factor(arrest)
0 150 (69.4%) 168 (77.8%) 318 (73.6%)
1 66 (30.6%) 48 (22.2%) 114 (26.4%)
age
Mean (SD) 24.2 (5.73) 25.0 (6.47) 24.6 (6.11)
Median [Min, Max] 23.0 [17.0, 44.0] 23.0 [17.0, 44.0] 23.0 [17.0, 44.0]
ggplot(data=df, aes(x=age)) + geom_histogram(fill="blue", col="white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Logistic regression

m1 = glm(arrest ~ finance, family=binomial, data=df)
summary(m1)
## 
## Call:
## glm(formula = arrest ~ finance, family = binomial, data = df)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -0.854  -0.854  -0.709   1.540   1.734  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.8210     0.1477  -5.558 2.73e-08 ***
## financeyes   -0.4318     0.2205  -1.959   0.0502 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 498.60  on 431  degrees of freedom
## Residual deviance: 494.73  on 430  degrees of freedom
## AIC: 498.73
## 
## Number of Fisher Scoring iterations: 4
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
## 
## Attaching package: 'epiDisplay'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
logistic.display(m1)
## 
## Logistic regression predicting arrest 
##  
##                      OR(95%CI)      P(Wald's test) P(LR-test)
## finance (cont. var.) 0.65 (0.42,1)  0.05           0.049     
##                                                              
## Log-likelihood = -247.3642
## No. of observations = 432
## AIC value = 498.7283