Task 1: Open R

Task 2: Read data

crime=read.csv("C:\\Users\\E7450\\Desktop\\R\\Datasets cho thuc hanh CR 8-2019\\Crime data 2003to2018.csv")
dim(crime)
## [1] 1048575       8
head(crime)
##          ID       Category                           Description     Day
## 1 180362289  VEHICLE THEFT                     STOLEN MOTORCYCLE Tuesday
## 2 180360948   NON-CRIMINAL          AIDED CASE, MENTAL DISTURBED Tuesday
## 3 180360879 OTHER OFFENSES                      PAROLE VIOLATION Tuesday
## 4 180360879 OTHER OFFENSES              TRAFFIC VIOLATION ARREST Tuesday
## 5 180360879 OTHER OFFENSES                     TRAFFIC VIOLATION Tuesday
## 6 180360829 OTHER OFFENSES DRIVERS LICENSE, SUSPENDED OR REVOKED Tuesday
##         Date  Time District     Resolution
## 1 05/15/2018 10:30 SOUTHERN           NONE
## 2 05/15/2018  4:14 SOUTHERN           NONE
## 3 05/15/2018  2:01  MISSION ARREST, BOOKED
## 4 05/15/2018  2:01  MISSION ARREST, BOOKED
## 5 05/15/2018  2:01  MISSION ARREST, BOOKED
## 6 05/15/2018  1:27  MISSION           NONE
library(sjPlot)
sjPlot::plot_frq(crime$Category)

sjPlot::plot_frq(crime$Day)

crime$Day=factor(crime$Day,levels=c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
sjPlot::plot_frq(crime$Day)

# Task 3: Coding

ob=read.csv("C:\\Users\\E7450\\Desktop\\R\\Datasets cho thuc hanh CR 8-2019\\Obesity data.csv")
ob$OB[ob$bmi<18.5]="underweight"
ob$OB[ob$bmi>=18.5&ob$bmi<24.9]="normal"
ob$OB[ob$bmi>=25&ob$bmi<29.9]="overweight"
ob$OB[ob$bmi>=30]="obese"
ob$OB=factor(ob$OB,levels=c("underweight","normal","overweight","obese"))
sjPlot::plot_frq(ob$OB)

# Task 4: Merge data

students=read.csv("C:\\Users\\E7450\\Desktop\\R\\Datasets cho thuc hanh CR 8-2019\\PISA VN 2015.csv")
schools=read.csv("C:\\Users\\E7450\\Desktop\\R\\Datasets cho thuc hanh CR 8-2019\\PISA VN SCHOOLS 2015.csv")
pisa=merge(students,schools,by="CNTSCHID")
summary(pisa)
##     CNTSCHID             AGE          Gender         PARED       
##  Min.   :70400001   Min.   :15.33   Boys :2786   Min.   : 3.000  
##  1st Qu.:70400052   1st Qu.:15.50   Girls:3040   1st Qu.: 9.000  
##  Median :70400096   Median :15.75                Median : 9.000  
##  Mean   :70400097   Mean   :15.78                Mean   : 9.374  
##  3rd Qu.:70400143   3rd Qu.:16.00                3rd Qu.:12.000  
##  Max.   :70400188   Max.   :16.25                Max.   :17.000  
##                                                  NA's   :14      
##      HEDRES            MISCED          FISCED          HISCED    
##  Min.   :-4.3706   Min.   :0.000   Min.   :0.000   Min.   :0.00  
##  1st Qu.:-1.5169   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.00  
##  Median :-1.1119   Median :2.000   Median :2.000   Median :2.00  
##  Mean   :-1.0470   Mean   :2.069   Mean   :2.296   Mean   :2.58  
##  3rd Qu.:-0.7026   3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:4.00  
##  Max.   : 1.1767   Max.   :6.000   Max.   :6.000   Max.   :6.00  
##  NA's   :20        NA's   :34      NA's   :88      NA's   :14    
##      WEALTH            ESCS           INSTSCIE          SCIEEFF       
##  Min.   :-7.635   Min.   :-5.657   Min.   :-1.9301   Min.   :-3.7565  
##  1st Qu.:-2.829   1st Qu.:-2.539   1st Qu.: 0.0125   1st Qu.:-0.8585  
##  Median :-2.163   Median :-1.982   Median : 0.3708   Median :-0.3473  
##  Mean   :-2.219   Mean   :-1.822   Mean   : 0.4835   Mean   :-0.2662  
##  3rd Qu.:-1.504   3rd Qu.:-1.221   3rd Qu.: 1.0218   3rd Qu.: 0.3155  
##  Max.   : 3.211   Max.   : 1.950   Max.   : 1.7359   Max.   : 3.2775  
##  NA's   :15       NA's   :1        NA's   :17        NA's   :19       
##     JOYSCIE            ICTRES          HOMEPOS          HEDRES.1      
##  Min.   :-2.1154   Min.   :-3.508   Min.   :-8.955   Min.   :-4.3706  
##  1st Qu.: 0.5094   1st Qu.:-2.587   1st Qu.:-2.669   1st Qu.:-1.5169  
##  Median : 0.5094   Median :-1.855   Median :-2.047   Median :-1.1119  
##  Mean   : 0.6448   Mean   :-1.795   Mean   :-2.042   Mean   :-1.0470  
##  3rd Qu.: 1.1049   3rd Qu.:-1.117   3rd Qu.:-1.354   3rd Qu.:-0.7026  
##  Max.   : 2.1635   Max.   : 3.497   Max.   : 2.770   Max.   : 1.1767  
##  NA's   :19        NA's   :34       NA's   :2        NA's   :20       
##     CULTPOSS          PV1MATH         PV1READ         PV1SCIE     
##  Min.   :-1.8413   Min.   :201.7   Min.   :107.1   Min.   :292.7  
##  1st Qu.:-0.8310   1st Qu.:440.0   1st Qu.:442.5   1st Qu.:470.9  
##  Median :-0.4113   Median :493.4   Median :489.5   Median :523.9  
##  Mean   :-0.4444   Mean   :496.1   Mean   :489.9   Mean   :524.8  
##  3rd Qu.: 0.1157   3rd Qu.:551.5   3rd Qu.:537.6   3rd Qu.:574.8  
##  Max.   : 2.1655   Max.   :820.1   Max.   :744.1   Max.   :807.3  
##  NA's   :52                                                       
##     STRATUM       SCHSIZE         CLSIZE         STRATIO      
##  VNM0313:989   Min.   : 113   Min.   :13.00   Min.   : 4.314  
##  VNM0208:884   1st Qu.: 650   1st Qu.:38.00   1st Qu.:14.024  
##  VNM0101:806   Median :1090   Median :38.00   Median :16.627  
##  VNM0207:790   Mean   :1082   Mean   :40.57   Mean   :16.497  
##  VNM0102:764   3rd Qu.:1419   3rd Qu.:43.00   3rd Qu.:18.983  
##  VNM0314:679   Max.   :4016   Max.   :53.00   Max.   :38.651  
##  (Other):914                  NA's   :34                      
##     SCHLTYPE         Region         Area     
##  Min.   :1.000   CENTRAL:2006   REMOTE: 410  
##  1st Qu.:3.000   NORTH  :1958   RURAL :2368  
##  Median :3.000   SOUTH  :1862   URBAN :3048  
##  Mean   :2.849                               
##  3rd Qu.:3.000                               
##  Max.   :3.000                               
##  NA's   :35
sjPlot::plot_frq(pisa$Region)

sjPlot::plot_frq(pisa$Area)

# Task 5: Describe “pisa” with “table1”

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
students=read.csv("C:\\Users\\E7450\\Desktop\\R\\Datasets cho thuc hanh CR 8-2019\\PISA VN 2015.csv")
schools=read.csv("C:\\Users\\E7450\\Desktop\\R\\Datasets cho thuc hanh CR 8-2019\\PISA VN SCHOOLS 2015.csv")
pisa=merge(students,schools,by="CNTSCHID")
table1(~WEALTH+PARED+HEDRES+PV1MATH+PV1SCIE+PV1READ|Region,data=pisa)
CENTRAL
(n=2006)
NORTH
(n=1958)
SOUTH
(n=1862)
Overall
(n=5826)
WEALTH
Mean (SD) -2.40 (1.12) -2.18 (1.18) -2.06 (1.14) -2.22 (1.16)
Median [Min, Max] -2.33 [-7.64, 1.41] -2.14 [-7.64, 2.63] -2.03 [-7.64, 3.21] -2.16 [-7.64, 3.21]
Missing 6 (0.3%) 8 (0.4%) 1 (0.1%) 15 (0.3%)
PARED
Mean (SD) 9.49 (3.44) 9.76 (3.51) 8.85 (3.54) 9.37 (3.51)
Median [Min, Max] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0]
Missing 3 (0.1%) 9 (0.5%) 2 (0.1%) 14 (0.2%)
HEDRES
Mean (SD) -1.04 (0.955) -1.06 (0.930) -1.04 (0.941) -1.05 (0.942)
Median [Min, Max] -1.11 [-4.37, 1.18] -1.11 [-4.37, 1.16] -1.11 [-4.37, 1.16] -1.11 [-4.37, 1.18]
Missing 7 (0.3%) 7 (0.4%) 6 (0.3%) 20 (0.3%)
PV1MATH
Mean (SD) 492 (86.5) 501 (84.4) 496 (72.2) 496 (81.5)
Median [Min, Max] 488 [202, 818] 500 [251, 820] 494 [241, 719] 493 [202, 820]
PV1SCIE
Mean (SD) 524 (79.8) 523 (76.6) 528 (67.3) 525 (75.0)
Median [Min, Max] 520 [307, 807] 522 [293, 775] 528 [337, 761] 524 [293, 807]
PV1READ
Mean (SD) 488 (74.3) 489 (72.4) 493 (64.4) 490 (70.6)
Median [Min, Max] 486 [233, 744] 489 [107, 718] 493 [272, 698] 489 [107, 744]
table1(~WEALTH+PARED+HEDRES+PV1MATH+PV1SCIE+PV1READ|Area,data=pisa)
REMOTE
(n=410)
RURAL
(n=2368)
URBAN
(n=3048)
Overall
(n=5826)
WEALTH
Mean (SD) -3.00 (1.25) -2.22 (1.08) -2.12 (1.16) -2.22 (1.16)
Median [Min, Max] -2.83 [-7.64, -0.0430] -2.16 [-7.64, 1.43] -2.10 [-7.64, 3.21] -2.16 [-7.64, 3.21]
Missing 6 (1.5%) 7 (0.3%) 2 (0.1%) 15 (0.3%)
PARED
Mean (SD) 7.90 (3.69) 9.38 (3.47) 9.56 (3.48) 9.37 (3.51)
Median [Min, Max] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0] 9.00 [3.00, 17.0]
Missing 8 (2.0%) 5 (0.2%) 1 (0.0%) 14 (0.2%)
HEDRES
Mean (SD) -1.66 (1.12) -0.985 (0.946) -1.01 (0.883) -1.05 (0.942)
Median [Min, Max] -1.44 [-4.37, 1.16] -1.11 [-4.37, 1.18] -1.11 [-4.37, 1.16] -1.11 [-4.37, 1.18]
Missing 6 (1.5%) 9 (0.4%) 5 (0.2%) 20 (0.3%)
PV1MATH
Mean (SD) 450 (82.0) 500 (81.9) 499 (79.3) 496 (81.5)
Median [Min, Max] 446 [216, 696] 498 [273, 818] 497 [202, 820] 493 [202, 820]
PV1SCIE
Mean (SD) 482 (74.4) 529 (75.5) 527 (72.8) 525 (75.0)
Median [Min, Max] 475 [307, 698] 529 [335, 807] 525 [293, 799] 524 [293, 807]
PV1READ
Mean (SD) 440 (76.0) 491 (67.6) 496 (69.6) 490 (70.6)
Median [Min, Max] 439 [233, 643] 490 [292, 744] 495 [107, 718] 489 [107, 744]