library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(ggplot2)
t = "/Users/tuanvnguyen/Desktop/BDA2022/Datasets/Arrest dataset.csv"
df = read.csv(t)
head(df)
## id age finance week arrest race work.exp married parole prior educ
## 1 1 27 no 20 1 black no not married yes 3 3
## 2 2 18 no 17 1 black no not married yes 8 4
## 3 3 19 no 25 1 other yes not married yes 13 3
## 4 4 23 yes 52 0 black yes married yes 1 5
## 5 5 19 no 52 0 other yes not married yes 3 3
## 6 6 24 no 52 0 black yes not married no 2 4
## employ1
## 1 no
## 2 no
## 3 no
## 4 no
## 5 no
## 6 no
table1(~factor(arrest) + age | finance, data=df)
| no (N=216) |
yes (N=216) |
Overall (N=432) |
|
|---|---|---|---|
| factor(arrest) | |||
| 0 | 150 (69.4%) | 168 (77.8%) | 318 (73.6%) |
| 1 | 66 (30.6%) | 48 (22.2%) | 114 (26.4%) |
| age | |||
| Mean (SD) | 24.2 (5.73) | 25.0 (6.47) | 24.6 (6.11) |
| Median [Min, Max] | 23.0 [17.0, 44.0] | 23.0 [17.0, 44.0] | 23.0 [17.0, 44.0] |
ggplot(data=df, aes(x=age)) + geom_histogram(fill="blue", col="white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
m1 = glm(arrest ~ finance, family=binomial, data=df)
summary(m1)
##
## Call:
## glm(formula = arrest ~ finance, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.854 -0.854 -0.709 1.540 1.734
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8210 0.1477 -5.558 2.73e-08 ***
## financeyes -0.4318 0.2205 -1.959 0.0502 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 498.60 on 431 degrees of freedom
## Residual deviance: 494.73 on 430 degrees of freedom
## AIC: 498.73
##
## Number of Fisher Scoring iterations: 4
library(epiDisplay)
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following object is masked from 'package:ggplot2':
##
## alpha
logistic.display(m1)
##
## Logistic regression predicting arrest
##
## OR(95%CI) P(Wald's test) P(LR-test)
## finance (cont. var.) 0.65 (0.42,1) 0.05 0.049
##
## Log-likelihood = -247.3642
## No. of observations = 432
## AIC value = 498.7283