This file documents codes for day 1 exercise using Rmarkdown

# Reading data into R
t = "D:\\Analysis\\Arrest data for cox model.csv"
arr = read.csv(t)

# How many rows and columns 
dim(arr)

## [1] 432  11

First 6 rows

head(arr)

##   id week arrest finance age  race work     married parole prior educ
## 1  1   20      1      no  27 black   no not married    yes     3    3
## 2  2   17      1      no  18 black   no not married    yes     8    4
## 3  3   25      1      no  19 other  yes not married    yes    13    3
## 4  4   52      0     yes  23 black  yes     married    yes     1    5
## 5  5   52      0      no  19 other  yes not married    yes     3    3
## 6  6   52      0      no  24 black  yes not married     no     2    4

Last 6 rows

tail(arr)

##      id week arrest finance age  race work     married parole prior educ
## 427 427   12      1     yes  22 black  yes     married    yes     2    4
## 428 428   52      0     yes  31 other  yes not married    yes     3    3
## 429 429   52      0      no  20 black   no not married    yes     1    4
## 430 430   52      0     yes  20 black  yes     married    yes     1    3
## 431 431   52      0      no  29 black  yes not married    yes     3    4
## 432 432   52      0     yes  24 black  yes not married    yes     1    4

arr$arrest1[arr$arrest == 1] = "Yes"
arr$arrest1[arr$arrest == 0] = "No"

Descriptive analysis

library(table1)

## 
## Attaching package: 'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

table1(~ age + arrest1 + race + parole + factor(educ) | finance, data = arr)

	no (N=216)	yes (N=216)	Overall (N=432)
age
Mean (SD)	24.2 (5.73)	25.0 (6.47)	24.6 (6.11)
Median [Min, Max]	23.0 [17.0, 44.0]	23.0 [17.0, 44.0]	23.0 [17.0, 44.0]
arrest1
No	150 (69.4%)	168 (77.8%)	318 (73.6%)
Yes	66 (30.6%)	48 (22.2%)	114 (26.4%)
race
black	185 (85.6%)	194 (89.8%)	379 (87.7%)
other	31 (14.4%)	22 (10.2%)	53 (12.3%)
parole
no	81 (37.5%)	84 (38.9%)	165 (38.2%)
yes	135 (62.5%)	132 (61.1%)	267 (61.8%)
factor(educ)
2	17 (7.9%)	7 (3.2%)	24 (5.6%)
3	117 (54.2%)	122 (56.5%)	239 (55.3%)
4	57 (26.4%)	62 (28.7%)	119 (27.5%)
5	21 (9.7%)	18 (8.3%)	39 (9.0%)
6	4 (1.9%)	7 (3.2%)	11 (2.5%)

Descriptive analysis using compareGroups

library(compareGroups)
t = compareGroups(finance ~ age + race + prior + parole, data = arr) 
createTable(t)

## 
## --------Summary descriptives table by 'finance'---------
## 
## ___________________________________________ 
##               no          yes     p.overall 
##              N=216       N=216              
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## age       24.2 (5.73) 25.0 (6.47)   0.203   
## race:                               0.241   
##     black 185 (85.6%) 194 (89.8%)           
##     other 31 (14.4%)  22 (10.2%)            
## prior     2.99 (2.92) 2.98 (2.88)   0.987   
## parole:                             0.843   
##     no    81 (37.5%)  84 (38.9%)            
##     yes   135 (62.5%) 132 (61.1%)           
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

Simple data vizualization

hist(arr$week,
     main = "Phân bố tuần bị bắt",
     xlab = "Tuần", 
     ylab = "Số đối tượng", 
     col = "blue",
     border = "white")

# Using ggplot2

library(ggplot2)
ggplot(data = arr, aes(x = week)) + geom_histogram(
  fill = "blue",
  col = "white") + labs(title="Phân bố tuần bị bắt", x="Số tuần", y="Số đối tượng")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Arrest

hanndao

2025-07-11

This file documents codes for day 1 exercise using Rmarkdown

First 6 rows

Last 6 rows

Descriptive analysis

Descriptive analysis using compareGroups

Simple data vizualization