Task 1: Cài đặt và gọi các package

Làm quen với những package: table1, compareGroups, sjPlot, sjmisc

library(table1); library(compareGroups); library(sjPlot); library(sjmisc)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
## Loading required package: SNPassoc
## Loading required package: haplo.stats
## Loading required package: survival
## Loading required package: mvtnorm
## Loading required package: parallel
## Registered S3 method overwritten by 'SNPassoc':
##   method            from       
##   summary.haplo.glm haplo.stats
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

Task 2: Đọc dữ liệu

pisa = read.csv("C:\\Users\\Nguyen\\Desktop\\TDT Workshop 2020\\TDTU Datasets for 2020 Workshop\\PISA Data Vietnam 2015.csv")
dim(pisa)
## [1] 5826   18
head(pisa)
##     School SchoolSize ClassSize STratio SchoolType  Area Region   Age
## 1 70400001        883        18  22.075          3 URBAN  SOUTH 15.58
## 2 70400001        883        18  22.075          3 URBAN  SOUTH 15.92
## 3 70400001        883        18  22.075          3 URBAN  SOUTH 15.42
## 4 70400001        883        18  22.075          3 URBAN  SOUTH 15.58
## 5 70400001        883        18  22.075          3 URBAN  SOUTH 15.92
## 6 70400001        883        18  22.075          3 URBAN  SOUTH 16.25
##   Gender PARED HISCED  WEALTH INSTSCIE JOYSCIE  ICTRES    Math    Read
## 1   Boys     9      2 -2.0697   0.9798  2.1635 -1.5244 439.923 412.290
## 2   Boys    12      4 -1.7903   1.7359  2.1635 -1.9305 406.251 409.598
## 3  Girls     9      2 -2.1942  -0.2063 -0.1808 -1.6093 414.369 384.307
## 4  Girls     5      1 -2.0301  -0.3115 -0.4318 -1.6250 468.801 459.104
## 5  Girls     9      2 -1.0522   0.7648  1.3031 -0.5305 355.432 402.435
## 6  Girls     5      1 -3.0570   0.3708  0.5094 -2.5873 458.955 483.885
##   Science
## 1 475.612
## 2 450.320
## 3 405.787
## 4 462.968
## 5 453.736
## 6 529.866
summary(pisa)
##      School           SchoolSize     ClassSize        STratio      
##  Min.   :70400001   Min.   : 113   Min.   :13.00   Min.   : 4.314  
##  1st Qu.:70400052   1st Qu.: 650   1st Qu.:38.00   1st Qu.:14.024  
##  Median :70400096   Median :1090   Median :38.00   Median :16.627  
##  Mean   :70400097   Mean   :1082   Mean   :40.57   Mean   :16.497  
##  3rd Qu.:70400143   3rd Qu.:1419   3rd Qu.:43.00   3rd Qu.:18.983  
##  Max.   :70400188   Max.   :4016   Max.   :53.00   Max.   :38.651  
##                                    NA's   :34                      
##    SchoolType        Area          Region          Age          Gender    
##  Min.   :1.000   REMOTE: 410   CENTRAL:2006   Min.   :15.33   Boys :2786  
##  1st Qu.:3.000   RURAL :2368   NORTH  :1958   1st Qu.:15.50   Girls:3040  
##  Median :3.000   URBAN :3048   SOUTH  :1862   Median :15.75               
##  Mean   :2.849                                Mean   :15.78               
##  3rd Qu.:3.000                                3rd Qu.:16.00               
##  Max.   :3.000                                Max.   :16.25               
##  NA's   :35                                                               
##      PARED            HISCED         WEALTH          INSTSCIE      
##  Min.   : 3.000   Min.   :0.00   Min.   :-7.635   Min.   :-1.9301  
##  1st Qu.: 9.000   1st Qu.:2.00   1st Qu.:-2.829   1st Qu.: 0.0125  
##  Median : 9.000   Median :2.00   Median :-2.163   Median : 0.3708  
##  Mean   : 9.374   Mean   :2.58   Mean   :-2.219   Mean   : 0.4835  
##  3rd Qu.:12.000   3rd Qu.:4.00   3rd Qu.:-1.504   3rd Qu.: 1.0218  
##  Max.   :17.000   Max.   :6.00   Max.   : 3.211   Max.   : 1.7359  
##  NA's   :14       NA's   :14     NA's   :15       NA's   :17       
##     JOYSCIE            ICTRES            Math            Read      
##  Min.   :-2.1154   Min.   :-3.508   Min.   :201.7   Min.   :107.1  
##  1st Qu.: 0.5094   1st Qu.:-2.587   1st Qu.:440.0   1st Qu.:442.5  
##  Median : 0.5094   Median :-1.855   Median :493.4   Median :489.5  
##  Mean   : 0.6448   Mean   :-1.795   Mean   :496.1   Mean   :489.9  
##  3rd Qu.: 1.1049   3rd Qu.:-1.117   3rd Qu.:551.5   3rd Qu.:537.6  
##  Max.   : 2.1635   Max.   : 3.497   Max.   :820.1   Max.   :744.1  
##  NA's   :19        NA's   :34                                      
##     Science     
##  Min.   :292.7  
##  1st Qu.:470.9  
##  Median :523.9  
##  Mean   :524.8  
##  3rd Qu.:574.8  
##  Max.   :807.3  
## 
table(pisa$Area)
## 
## REMOTE  RURAL  URBAN 
##    410   2368   3048
pisa$Area = factor(pisa$Area, levels=c("URBAN", "RURAL", "REMOTE"))
table(pisa$Area)
## 
##  URBAN  RURAL REMOTE 
##   3048   2368    410

Task 3: Mã hóa (coding)

pisa$Type[pisa$SchoolType == 1] = "Private"
pisa$Type[pisa$SchoolType == 3] = "Public"
table(pisa$Type)
## 
## Private  Public 
##     436    5355

Task 4: Phân tích mô tả pisa với package table1

library(table1)
table1(~ WEALTH + PARED + Math + Read + Science | Region, data=pisa)
CENTRAL
(n=2006)
NORTH
(n=1958)
SOUTH
(n=1862)
Overall
(n=5826)
WEALTH
Mean (SD) -2.40 (1.12) -2.18 (1.18) -2.06 (1.14) -2.22 (1.16)
Median [Min, Max] -2.33 [-7.64, 1.41] -2.14 [-7.64, 2.63] -2.03 [-7.64, 3.21] -2.16 [-7.64, 3.21]
Missing 6 (0.3%) 8 (0.4%) 1 (0.1%) 15 (0.3%)
PARED
Mean (SD) 9.49 (3.44) 9.76 (3.51) 8.85 (3.54) 9.37 (3.51)
Median [Min, Max] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0]
Missing 3 (0.1%) 9 (0.5%) 2 (0.1%) 14 (0.2%)
Math
Mean (SD) 492 (86.5) 501 (84.4) 496 (72.2) 496 (81.5)
Median [Min, Max] 488 [202, 818] 500 [251, 820] 494 [241, 719] 493 [202, 820]
Read
Mean (SD) 488 (74.3) 489 (72.4) 493 (64.4) 490 (70.6)
Median [Min, Max] 486 [233, 744] 489 [107, 718] 493 [272, 698] 489 [107, 744]
Science
Mean (SD) 524 (79.8) 523 (76.6) 528 (67.3) 525 (75.0)
Median [Min, Max] 520 [307, 807] 522 [293, 775] 528 [337, 761] 524 [293, 807]
table1(~ WEALTH + PARED + Math + Read + Science | Area, data=pisa)
URBAN
(n=3048)
RURAL
(n=2368)
REMOTE
(n=410)
Overall
(n=5826)
WEALTH
Mean (SD) -2.12 (1.16) -2.22 (1.08) -3.00 (1.25) -2.22 (1.16)
Median [Min, Max] -2.10 [-7.64, 3.21] -2.16 [-7.64, 1.43] -2.83 [-7.64, -0.0430] -2.16 [-7.64, 3.21]
Missing 2 (0.1%) 7 (0.3%) 6 (1.5%) 15 (0.3%)
PARED
Mean (SD) 9.56 (3.48) 9.38 (3.47) 7.90 (3.69) 9.37 (3.51)
Median [Min, Max] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0]
Missing 1 (0.0%) 5 (0.2%) 8 (2.0%) 14 (0.2%)
Math
Mean (SD) 499 (79.3) 500 (81.9) 450 (82.0) 496 (81.5)
Median [Min, Max] 497 [202, 820] 498 [273, 818] 446 [216, 696] 493 [202, 820]
Read
Mean (SD) 496 (69.6) 491 (67.6) 440 (76.0) 490 (70.6)
Median [Min, Max] 495 [107, 718] 490 [292, 744] 439 [233, 643] 489 [107, 744]
Science
Mean (SD) 527 (72.8) 529 (75.5) 482 (74.4) 525 (75.0)
Median [Min, Max] 525 [293, 799] 529 [335, 807] 475 [307, 698] 524 [293, 807]
table1(~ WEALTH + PARED + Math + Read + Science | Region*Area, data=pisa)
CENTRAL
NORTH
SOUTH
Overall
URBAN
(n=951)
RURAL
(n=857)
REMOTE
(n=198)
URBAN
(n=1046)
RURAL
(n=764)
REMOTE
(n=148)
URBAN
(n=1051)
RURAL
(n=747)
REMOTE
(n=64)
URBAN
(n=3048)
RURAL
(n=2368)
REMOTE
(n=410)
WEALTH
Mean (SD) -2.60 (1.01) -2.07 (1.13) -2.93 (1.21) -1.90 (1.12) -2.31 (1.07) -3.42 (1.23) -1.89 (1.18) -2.29 (1.03) -2.28 (1.07) -2.12 (1.16) -2.22 (1.08) -3.00 (1.25)
Median [Min, Max] -2.54 [-7.64, 0.525] -1.98 [-5.97, 1.41] -2.78 [-7.64, -0.0430] -1.90 [-5.97, 2.63] -2.25 [-7.64, 1.43] -3.22 [-7.64, -0.884] -1.81 [-5.97, 3.21] -2.24 [-7.64, 0.918] -2.17 [-5.64, -0.265] -2.10 [-7.64, 3.21] -2.16 [-7.64, 1.43] -2.83 [-7.64, -0.0430]
Missing 0 (0%) 1 (0.1%) 5 (2.5%) 2 (0.2%) 5 (0.7%) 1 (0.7%) 0 (0%) 1 (0.1%) 0 (0%) 2 (0.1%) 7 (0.3%) 6 (1.5%)
PARED
Mean (SD) 9.02 (3.06) 10.3 (3.57) 8.04 (3.67) 10.4 (3.61) 9.36 (3.09) 7.43 (3.70) 9.26 (3.56) 8.29 (3.41) 8.54 (3.64) 9.56 (3.48) 9.38 (3.47) 7.90 (3.69)
Median [Min, Max] 9.00 [3.00, 17.0] 12.0 [3.00, 17.0] 9.00 [3.00, 17.0] 12.0 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0]
Missing 1 (0.1%) 1 (0.1%) 1 (0.5%) 0 (0%) 3 (0.4%) 6 (4.1%) 0 (0%) 1 (0.1%) 1 (1.6%) 1 (0.0%) 5 (0.2%) 8 (2.0%)
Math
Mean (SD) 489 (75.8) 509 (91.9) 429 (81.0) 507 (88.3) 500 (77.0) 464 (83.5) 500 (71.7) 490 (73.0) 482 (63.5) 499 (79.3) 500 (81.9) 450 (82.0)
Median [Min, Max] 488 [202, 794] 501 [273, 818] 419 [216, 649] 502 [251, 820] 504 [286, 729] 461 [298, 679] 500 [241, 719] 490 [293, 677] 484 [350, 696] 497 [202, 820] 498 [273, 818] 446 [216, 696]
Read
Mean (SD) 484 (63.6) 506 (75.8) 430 (83.4) 498 (77.0) 484 (61.8) 446 (71.6) 504 (65.6) 481 (60.1) 458 (55.0) 496 (69.6) 491 (67.6) 440 (76.0)
Median [Min, Max] 483 [288, 718] 504 [316, 744] 417 [233, 643] 496 [107, 718] 487 [305, 655] 448 [261, 624] 506 [321, 698] 482 [292, 697] 466 [272, 572] 495 [107, 718] 490 [292, 744] 439 [233, 643]
Science
Mean (SD) 521 (67.3) 539 (86.6) 469 (79.5) 528 (82.1) 522 (67.9) 494 (73.2) 532 (67.1) 526 (67.7) 491 (52.4) 527 (72.8) 529 (75.5) 482 (74.4)
Median [Min, Max] 518 [330, 799] 532 [347, 807] 455 [307, 658] 523 [293, 775] 526 [335, 717] 491 [366, 698] 530 [337, 761] 528 [344, 722] 488 [378, 640] 525 [293, 799] 529 [335, 807] 475 [307, 698]

Task 5: Phân tích mô tả pisa với package compareGroups

library(compareGroups)
t = compareGroups(Area ~ WEALTH + PARED + Math + Read + Science, data=pisa)
createTable(t)
## 
## --------Summary descriptives table by 'Area'---------
## 
## ________________________________________________________ 
##            URBAN        RURAL        REMOTE    p.overall 
##            N=3048       N=2368       N=410               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## WEALTH  -2.12 (1.16) -2.22 (1.08) -3.00 (1.25)  <0.001   
## PARED   9.56 (3.48)  9.38 (3.47)  7.90 (3.69)   <0.001   
## Math     499 (79.3)   500 (81.9)   450 (82.0)   <0.001   
## Read     496 (69.6)   491 (67.6)   440 (76.0)   <0.001   
## Science  527 (72.8)   529 (75.5)   482 (74.4)   <0.001   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯