Làm quen với những package: table1, compareGroups, sjPlot, sjmisc
library(table1); library(compareGroups); library(sjPlot); library(sjmisc)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
## Loading required package: SNPassoc
## Loading required package: haplo.stats
## Loading required package: survival
## Loading required package: mvtnorm
## Loading required package: parallel
## Registered S3 method overwritten by 'SNPassoc':
## method from
## summary.haplo.glm haplo.stats
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
PISA Data Vietnam 2015.csv vào R và gọi đối tượng là pisa.pisa = read.csv("C:\\Users\\Nguyen\\Desktop\\TDT Workshop 2020\\TDTU Datasets for 2020 Workshop\\PISA Data Vietnam 2015.csv")
dim(pisa)
## [1] 5826 18
head(pisa)
## School SchoolSize ClassSize STratio SchoolType Area Region Age
## 1 70400001 883 18 22.075 3 URBAN SOUTH 15.58
## 2 70400001 883 18 22.075 3 URBAN SOUTH 15.92
## 3 70400001 883 18 22.075 3 URBAN SOUTH 15.42
## 4 70400001 883 18 22.075 3 URBAN SOUTH 15.58
## 5 70400001 883 18 22.075 3 URBAN SOUTH 15.92
## 6 70400001 883 18 22.075 3 URBAN SOUTH 16.25
## Gender PARED HISCED WEALTH INSTSCIE JOYSCIE ICTRES Math Read
## 1 Boys 9 2 -2.0697 0.9798 2.1635 -1.5244 439.923 412.290
## 2 Boys 12 4 -1.7903 1.7359 2.1635 -1.9305 406.251 409.598
## 3 Girls 9 2 -2.1942 -0.2063 -0.1808 -1.6093 414.369 384.307
## 4 Girls 5 1 -2.0301 -0.3115 -0.4318 -1.6250 468.801 459.104
## 5 Girls 9 2 -1.0522 0.7648 1.3031 -0.5305 355.432 402.435
## 6 Girls 5 1 -3.0570 0.3708 0.5094 -2.5873 458.955 483.885
## Science
## 1 475.612
## 2 450.320
## 3 405.787
## 4 462.968
## 5 453.736
## 6 529.866
summary(pisa)
## School SchoolSize ClassSize STratio
## Min. :70400001 Min. : 113 Min. :13.00 Min. : 4.314
## 1st Qu.:70400052 1st Qu.: 650 1st Qu.:38.00 1st Qu.:14.024
## Median :70400096 Median :1090 Median :38.00 Median :16.627
## Mean :70400097 Mean :1082 Mean :40.57 Mean :16.497
## 3rd Qu.:70400143 3rd Qu.:1419 3rd Qu.:43.00 3rd Qu.:18.983
## Max. :70400188 Max. :4016 Max. :53.00 Max. :38.651
## NA's :34
## SchoolType Area Region Age Gender
## Min. :1.000 REMOTE: 410 CENTRAL:2006 Min. :15.33 Boys :2786
## 1st Qu.:3.000 RURAL :2368 NORTH :1958 1st Qu.:15.50 Girls:3040
## Median :3.000 URBAN :3048 SOUTH :1862 Median :15.75
## Mean :2.849 Mean :15.78
## 3rd Qu.:3.000 3rd Qu.:16.00
## Max. :3.000 Max. :16.25
## NA's :35
## PARED HISCED WEALTH INSTSCIE
## Min. : 3.000 Min. :0.00 Min. :-7.635 Min. :-1.9301
## 1st Qu.: 9.000 1st Qu.:2.00 1st Qu.:-2.829 1st Qu.: 0.0125
## Median : 9.000 Median :2.00 Median :-2.163 Median : 0.3708
## Mean : 9.374 Mean :2.58 Mean :-2.219 Mean : 0.4835
## 3rd Qu.:12.000 3rd Qu.:4.00 3rd Qu.:-1.504 3rd Qu.: 1.0218
## Max. :17.000 Max. :6.00 Max. : 3.211 Max. : 1.7359
## NA's :14 NA's :14 NA's :15 NA's :17
## JOYSCIE ICTRES Math Read
## Min. :-2.1154 Min. :-3.508 Min. :201.7 Min. :107.1
## 1st Qu.: 0.5094 1st Qu.:-2.587 1st Qu.:440.0 1st Qu.:442.5
## Median : 0.5094 Median :-1.855 Median :493.4 Median :489.5
## Mean : 0.6448 Mean :-1.795 Mean :496.1 Mean :489.9
## 3rd Qu.: 1.1049 3rd Qu.:-1.117 3rd Qu.:551.5 3rd Qu.:537.6
## Max. : 2.1635 Max. : 3.497 Max. :820.1 Max. :744.1
## NA's :19 NA's :34
## Science
## Min. :292.7
## 1st Qu.:470.9
## Median :523.9
## Mean :524.8
## 3rd Qu.:574.8
## Max. :807.3
##
factortable(pisa$Area)
##
## REMOTE RURAL URBAN
## 410 2368 3048
pisa$Area = factor(pisa$Area, levels=c("URBAN", "RURAL", "REMOTE"))
table(pisa$Area)
##
## URBAN RURAL REMOTE
## 3048 2368 410
Type. Nếu SchoolType == 1 thì Type= "Private"; nếu SchoolType == 3 thì Type = "Public".pisa$Type[pisa$SchoolType == 1] = "Private"
pisa$Type[pisa$SchoolType == 3] = "Public"
table(pisa$Type)
##
## Private Public
## 436 5355
pisa với package table1library(table1)
table1(~ WEALTH + PARED + Math + Read + Science | Region, data=pisa)
| CENTRAL (n=2006) |
NORTH (n=1958) |
SOUTH (n=1862) |
Overall (n=5826) |
|
|---|---|---|---|---|
| WEALTH | ||||
| Mean (SD) | -2.40 (1.12) | -2.18 (1.18) | -2.06 (1.14) | -2.22 (1.16) |
| Median [Min, Max] | -2.33 [-7.64, 1.41] | -2.14 [-7.64, 2.63] | -2.03 [-7.64, 3.21] | -2.16 [-7.64, 3.21] |
| Missing | 6 (0.3%) | 8 (0.4%) | 1 (0.1%) | 15 (0.3%) |
| PARED | ||||
| Mean (SD) | 9.49 (3.44) | 9.76 (3.51) | 8.85 (3.54) | 9.37 (3.51) |
| Median [Min, Max] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] |
| Missing | 3 (0.1%) | 9 (0.5%) | 2 (0.1%) | 14 (0.2%) |
| Math | ||||
| Mean (SD) | 492 (86.5) | 501 (84.4) | 496 (72.2) | 496 (81.5) |
| Median [Min, Max] | 488 [202, 818] | 500 [251, 820] | 494 [241, 719] | 493 [202, 820] |
| Read | ||||
| Mean (SD) | 488 (74.3) | 489 (72.4) | 493 (64.4) | 490 (70.6) |
| Median [Min, Max] | 486 [233, 744] | 489 [107, 718] | 493 [272, 698] | 489 [107, 744] |
| Science | ||||
| Mean (SD) | 524 (79.8) | 523 (76.6) | 528 (67.3) | 525 (75.0) |
| Median [Min, Max] | 520 [307, 807] | 522 [293, 775] | 528 [337, 761] | 524 [293, 807] |
table1(~ WEALTH + PARED + Math + Read + Science | Area, data=pisa)
| URBAN (n=3048) |
RURAL (n=2368) |
REMOTE (n=410) |
Overall (n=5826) |
|
|---|---|---|---|---|
| WEALTH | ||||
| Mean (SD) | -2.12 (1.16) | -2.22 (1.08) | -3.00 (1.25) | -2.22 (1.16) |
| Median [Min, Max] | -2.10 [-7.64, 3.21] | -2.16 [-7.64, 1.43] | -2.83 [-7.64, -0.0430] | -2.16 [-7.64, 3.21] |
| Missing | 2 (0.1%) | 7 (0.3%) | 6 (1.5%) | 15 (0.3%) |
| PARED | ||||
| Mean (SD) | 9.56 (3.48) | 9.38 (3.47) | 7.90 (3.69) | 9.37 (3.51) |
| Median [Min, Max] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] |
| Missing | 1 (0.0%) | 5 (0.2%) | 8 (2.0%) | 14 (0.2%) |
| Math | ||||
| Mean (SD) | 499 (79.3) | 500 (81.9) | 450 (82.0) | 496 (81.5) |
| Median [Min, Max] | 497 [202, 820] | 498 [273, 818] | 446 [216, 696] | 493 [202, 820] |
| Read | ||||
| Mean (SD) | 496 (69.6) | 491 (67.6) | 440 (76.0) | 490 (70.6) |
| Median [Min, Max] | 495 [107, 718] | 490 [292, 744] | 439 [233, 643] | 489 [107, 744] |
| Science | ||||
| Mean (SD) | 527 (72.8) | 529 (75.5) | 482 (74.4) | 525 (75.0) |
| Median [Min, Max] | 525 [293, 799] | 529 [335, 807] | 475 [307, 698] | 524 [293, 807] |
table1(~ WEALTH + PARED + Math + Read + Science | Region*Area, data=pisa)
CENTRAL |
NORTH |
SOUTH |
Overall |
|||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| URBAN (n=951) |
RURAL (n=857) |
REMOTE (n=198) |
URBAN (n=1046) |
RURAL (n=764) |
REMOTE (n=148) |
URBAN (n=1051) |
RURAL (n=747) |
REMOTE (n=64) |
URBAN (n=3048) |
RURAL (n=2368) |
REMOTE (n=410) |
|
| WEALTH | ||||||||||||
| Mean (SD) | -2.60 (1.01) | -2.07 (1.13) | -2.93 (1.21) | -1.90 (1.12) | -2.31 (1.07) | -3.42 (1.23) | -1.89 (1.18) | -2.29 (1.03) | -2.28 (1.07) | -2.12 (1.16) | -2.22 (1.08) | -3.00 (1.25) |
| Median [Min, Max] | -2.54 [-7.64, 0.525] | -1.98 [-5.97, 1.41] | -2.78 [-7.64, -0.0430] | -1.90 [-5.97, 2.63] | -2.25 [-7.64, 1.43] | -3.22 [-7.64, -0.884] | -1.81 [-5.97, 3.21] | -2.24 [-7.64, 0.918] | -2.17 [-5.64, -0.265] | -2.10 [-7.64, 3.21] | -2.16 [-7.64, 1.43] | -2.83 [-7.64, -0.0430] |
| Missing | 0 (0%) | 1 (0.1%) | 5 (2.5%) | 2 (0.2%) | 5 (0.7%) | 1 (0.7%) | 0 (0%) | 1 (0.1%) | 0 (0%) | 2 (0.1%) | 7 (0.3%) | 6 (1.5%) |
| PARED | ||||||||||||
| Mean (SD) | 9.02 (3.06) | 10.3 (3.57) | 8.04 (3.67) | 10.4 (3.61) | 9.36 (3.09) | 7.43 (3.70) | 9.26 (3.56) | 8.29 (3.41) | 8.54 (3.64) | 9.56 (3.48) | 9.38 (3.47) | 7.90 (3.69) |
| Median [Min, Max] | 9.00 [3.00, 17.0] | 12.0 [3.00, 17.0] | 9.00 [3.00, 17.0] | 12.0 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] | 9.00 [3.00, 17.0] |
| Missing | 1 (0.1%) | 1 (0.1%) | 1 (0.5%) | 0 (0%) | 3 (0.4%) | 6 (4.1%) | 0 (0%) | 1 (0.1%) | 1 (1.6%) | 1 (0.0%) | 5 (0.2%) | 8 (2.0%) |
| Math | ||||||||||||
| Mean (SD) | 489 (75.8) | 509 (91.9) | 429 (81.0) | 507 (88.3) | 500 (77.0) | 464 (83.5) | 500 (71.7) | 490 (73.0) | 482 (63.5) | 499 (79.3) | 500 (81.9) | 450 (82.0) |
| Median [Min, Max] | 488 [202, 794] | 501 [273, 818] | 419 [216, 649] | 502 [251, 820] | 504 [286, 729] | 461 [298, 679] | 500 [241, 719] | 490 [293, 677] | 484 [350, 696] | 497 [202, 820] | 498 [273, 818] | 446 [216, 696] |
| Read | ||||||||||||
| Mean (SD) | 484 (63.6) | 506 (75.8) | 430 (83.4) | 498 (77.0) | 484 (61.8) | 446 (71.6) | 504 (65.6) | 481 (60.1) | 458 (55.0) | 496 (69.6) | 491 (67.6) | 440 (76.0) |
| Median [Min, Max] | 483 [288, 718] | 504 [316, 744] | 417 [233, 643] | 496 [107, 718] | 487 [305, 655] | 448 [261, 624] | 506 [321, 698] | 482 [292, 697] | 466 [272, 572] | 495 [107, 718] | 490 [292, 744] | 439 [233, 643] |
| Science | ||||||||||||
| Mean (SD) | 521 (67.3) | 539 (86.6) | 469 (79.5) | 528 (82.1) | 522 (67.9) | 494 (73.2) | 532 (67.1) | 526 (67.7) | 491 (52.4) | 527 (72.8) | 529 (75.5) | 482 (74.4) |
| Median [Min, Max] | 518 [330, 799] | 532 [347, 807] | 455 [307, 658] | 523 [293, 775] | 526 [335, 717] | 491 [366, 698] | 530 [337, 761] | 528 [344, 722] | 488 [378, 640] | 525 [293, 799] | 529 [335, 807] | 475 [307, 698] |
pisa với package compareGroupslibrary(compareGroups)
t = compareGroups(Area ~ WEALTH + PARED + Math + Read + Science, data=pisa)
createTable(t)
##
## --------Summary descriptives table by 'Area'---------
##
## ________________________________________________________
## URBAN RURAL REMOTE p.overall
## N=3048 N=2368 N=410
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## WEALTH -2.12 (1.16) -2.22 (1.08) -3.00 (1.25) <0.001
## PARED 9.56 (3.48) 9.38 (3.47) 7.90 (3.69) <0.001
## Math 499 (79.3) 500 (81.9) 450 (82.0) <0.001
## Read 496 (69.6) 491 (67.6) 440 (76.0) <0.001
## Science 527 (72.8) 529 (75.5) 482 (74.4) <0.001
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯