t = "E:\\2. NCS hồ sơ- tiến trình học 2024\\8.1. Viết báo quốc tế\\45. Lớp học phân tích dữ liệu. Đại học Văn Lang. T12.2022\\Arrest data.csv"
  arr = read.csv (t)
  head (arr)
##   id week arrest finance age  race work     married parole prior educ
## 1  1   20      1      no  27 black   no not married    yes     3    3
## 2  2   17      1      no  18 black   no not married    yes     8    4
## 3  3   25      1      no  19 other  yes not married    yes    13    3
## 4  4   52      0     yes  23 black  yes     married    yes     1    5
## 5  5   52      0      no  19 other  yes not married    yes     3    3
## 6  6   52      0      no  24 black  yes not married     no     2    4

Việc 4: Thông tin về dữ liệu arr

Tìm hiểu số dòng / cột

dim (arr)
## [1] 432  11

Trình bày 6 dòng đầu

head (arr)
##   id week arrest finance age  race work     married parole prior educ
## 1  1   20      1      no  27 black   no not married    yes     3    3
## 2  2   17      1      no  18 black   no not married    yes     8    4
## 3  3   25      1      no  19 other  yes not married    yes    13    3
## 4  4   52      0     yes  23 black  yes     married    yes     1    5
## 5  5   52      0      no  19 other  yes not married    yes     3    3
## 6  6   52      0      no  24 black  yes not married     no     2    4

Trình bày 6 dòng cuối

tail (arr)
##      id week arrest finance age  race work     married parole prior educ
## 427 427   12      1     yes  22 black  yes     married    yes     2    4
## 428 428   52      0     yes  31 other  yes not married    yes     3    3
## 429 429   52      0      no  20 black   no not married    yes     1    4
## 430 430   52      0     yes  20 black  yes     married    yes     1    3
## 431 431   52      0      no  29 black  yes not married    yes     3    4
## 432 432   52      0     yes  24 black  yes not married    yes     1    4

Việc 5: Tạo biến số mới arrest1

arr$arrest1[arr$arrest==1] = "Yes"
arr$arrest1 [arr$arrest==0] = "No"
table (arr$arrest1, arr$arrest)
##      
##         0   1
##   No  318   0
##   Yes   0 114
head (arr)
##   id week arrest finance age  race work     married parole prior educ arrest1
## 1  1   20      1      no  27 black   no not married    yes     3    3     Yes
## 2  2   17      1      no  18 black   no not married    yes     8    4     Yes
## 3  3   25      1      no  19 other  yes not married    yes    13    3     Yes
## 4  4   52      0     yes  23 black  yes     married    yes     1    5      No
## 5  5   52      0      no  19 other  yes not married    yes     3    3      No
## 6  6   52      0      no  24 black  yes not married     no     2    4      No

Tạo biến số mới fin

arr$fin [arr$finance=="yes"] = 1
arr$fin [arr$finance=="no"] = 0

head (arr)
##   id week arrest finance age  race work     married parole prior educ arrest1
## 1  1   20      1      no  27 black   no not married    yes     3    3     Yes
## 2  2   17      1      no  18 black   no not married    yes     8    4     Yes
## 3  3   25      1      no  19 other  yes not married    yes    13    3     Yes
## 4  4   52      0     yes  23 black  yes     married    yes     1    5      No
## 5  5   52      0      no  19 other  yes not married    yes     3    3      No
## 6  6   52      0      no  24 black  yes not married     no     2    4      No
##   fin
## 1   0
## 2   0
## 3   0
## 4   1
## 5   0
## 6   0

Tóm tắt dữ liệu

summary (arr)
##        id             week           arrest         finance         
##  Min.   :  1.0   Min.   : 1.00   Min.   :0.0000   Length:432        
##  1st Qu.:108.8   1st Qu.:50.00   1st Qu.:0.0000   Class :character  
##  Median :216.5   Median :52.00   Median :0.0000   Mode  :character  
##  Mean   :216.5   Mean   :45.85   Mean   :0.2639                     
##  3rd Qu.:324.2   3rd Qu.:52.00   3rd Qu.:1.0000                     
##  Max.   :432.0   Max.   :52.00   Max.   :1.0000                     
##       age           race               work             married         
##  Min.   :17.0   Length:432         Length:432         Length:432        
##  1st Qu.:20.0   Class :character   Class :character   Class :character  
##  Median :23.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :24.6                                                           
##  3rd Qu.:27.0                                                           
##  Max.   :44.0                                                           
##     parole              prior             educ         arrest1         
##  Length:432         Min.   : 0.000   Min.   :2.000   Length:432        
##  Class :character   1st Qu.: 1.000   1st Qu.:3.000   Class :character  
##  Mode  :character   Median : 2.000   Median :3.000   Mode  :character  
##                     Mean   : 2.984   Mean   :3.477                     
##                     3rd Qu.: 4.000   3rd Qu.:4.000                     
##                     Max.   :18.000   Max.   :6.000                     
##      fin           
##  Length:432        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Việc 7: Tóm tắt bằng hàm table1

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~ week+ arrest + arrest1+ age+ work+ married+ parole+ prior+ educ+ fin, data = arr)
Overall
(N=432)
week
Mean (SD) 45.9 (12.7)
Median [Min, Max] 52.0 [1.00, 52.0]
arrest
Mean (SD) 0.264 (0.441)
Median [Min, Max] 0 [0, 1.00]
arrest1
No 318 (73.6%)
Yes 114 (26.4%)
age
Mean (SD) 24.6 (6.11)
Median [Min, Max] 23.0 [17.0, 44.0]
work
no 185 (42.8%)
yes 247 (57.2%)
married
married 53 (12.3%)
not married 379 (87.7%)
parole
no 165 (38.2%)
yes 267 (61.8%)
prior
Mean (SD) 2.98 (2.90)
Median [Min, Max] 2.00 [0, 18.0]
educ
Mean (SD) 3.48 (0.834)
Median [Min, Max] 3.00 [2.00, 6.00]
fin
0 216 (50.0%)
1 216 (50.0%)
table1(~ week+ arrest + arrest1+ age+ work+ married+ parole+ prior+ educ| finance, data = arr)
no
(N=216)
yes
(N=216)
Overall
(N=432)
week
Mean (SD) 44.8 (13.5) 46.9 (11.7) 45.9 (12.7)
Median [Min, Max] 52.0 [1.00, 52.0] 52.0 [7.00, 52.0] 52.0 [1.00, 52.0]
arrest
Mean (SD) 0.306 (0.462) 0.222 (0.417) 0.264 (0.441)
Median [Min, Max] 0 [0, 1.00] 0 [0, 1.00] 0 [0, 1.00]
arrest1
No 150 (69.4%) 168 (77.8%) 318 (73.6%)
Yes 66 (30.6%) 48 (22.2%) 114 (26.4%)
age
Mean (SD) 24.2 (5.73) 25.0 (6.47) 24.6 (6.11)
Median [Min, Max] 23.0 [17.0, 44.0] 23.0 [17.0, 44.0] 23.0 [17.0, 44.0]
work
no 93 (43.1%) 92 (42.6%) 185 (42.8%)
yes 123 (56.9%) 124 (57.4%) 247 (57.2%)
married
married 29 (13.4%) 24 (11.1%) 53 (12.3%)
not married 187 (86.6%) 192 (88.9%) 379 (87.7%)
parole
no 81 (37.5%) 84 (38.9%) 165 (38.2%)
yes 135 (62.5%) 132 (61.1%) 267 (61.8%)
prior
Mean (SD) 2.99 (2.92) 2.98 (2.88) 2.98 (2.90)
Median [Min, Max] 2.00 [0, 18.0] 2.00 [0, 15.0] 2.00 [0, 18.0]
educ
Mean (SD) 3.44 (0.844) 3.52 (0.824) 3.48 (0.834)
Median [Min, Max] 3.00 [2.00, 6.00] 3.00 [2.00, 6.00] 3.00 [2.00, 6.00]