Reading data from file and save into pisa object
t = "C:\\Users\\Thuan Nguyen\\OneDrive\\R in BUH\\Dataset thuc hanh\\PISA Data Vietnam 2015.csv"
pisa = read.csv(t)
Show the numbers of pisa column and pisa row by using the command “dim(pisa)”
## [1] 5826 18
Summary the data
## School SchoolSize ClassSize STratio
## Min. :70400001 Min. : 113 Min. :13.00 Min. : 4.314
## 1st Qu.:70400052 1st Qu.: 650 1st Qu.:38.00 1st Qu.:14.024
## Median :70400096 Median :1090 Median :38.00 Median :16.627
## Mean :70400097 Mean :1082 Mean :40.57 Mean :16.497
## 3rd Qu.:70400143 3rd Qu.:1419 3rd Qu.:43.00 3rd Qu.:18.983
## Max. :70400188 Max. :4016 Max. :53.00 Max. :38.651
## NA's :34
## SchoolType Area Region Age Gender
## Min. :1.000 REMOTE: 410 CENTRAL:2006 Min. :15.33 Boys :2786
## 1st Qu.:3.000 RURAL :2368 NORTH :1958 1st Qu.:15.50 Girls:3040
## Median :3.000 URBAN :3048 SOUTH :1862 Median :15.75
## Mean :2.849 Mean :15.78
## 3rd Qu.:3.000 3rd Qu.:16.00
## Max. :3.000 Max. :16.25
## NA's :35
## PARED HISCED WEALTH INSTSCIE
## Min. : 3.000 Min. :0.00 Min. :-7.635 Min. :-1.9301
## 1st Qu.: 9.000 1st Qu.:2.00 1st Qu.:-2.829 1st Qu.: 0.0125
## Median : 9.000 Median :2.00 Median :-2.163 Median : 0.3708
## Mean : 9.374 Mean :2.58 Mean :-2.219 Mean : 0.4835
## 3rd Qu.:12.000 3rd Qu.:4.00 3rd Qu.:-1.504 3rd Qu.: 1.0218
## Max. :17.000 Max. :6.00 Max. : 3.211 Max. : 1.7359
## NA's :14 NA's :14 NA's :15 NA's :17
## JOYSCIE ICTRES Math Read
## Min. :-2.1154 Min. :-3.508 Min. :201.7 Min. :107.1
## 1st Qu.: 0.5094 1st Qu.:-2.587 1st Qu.:440.0 1st Qu.:442.5
## Median : 0.5094 Median :-1.855 Median :493.4 Median :489.5
## Mean : 0.6448 Mean :-1.795 Mean :496.1 Mean :489.9
## 3rd Qu.: 1.1049 3rd Qu.:-1.117 3rd Qu.:551.5 3rd Qu.:537.6
## Max. : 2.1635 Max. : 3.497 Max. :820.1 Max. :744.1
## NA's :19 NA's :34
## Science
## Min. :292.7
## 1st Qu.:470.9
## Median :523.9
## Mean :524.8
## 3rd Qu.:574.8
## Max. :807.3
##
Find 95% confidence interval
m = mean(pisa$Science)
s = sd(pisa$Science)
L95 = m - 1.96*s
U95 = m + 1.96*s
c(L95, m, U95)
## [1] 377.9063 524.8112 671.7161
Draw the histogram
hist(pisa$Math,col = "dark green", border = "white", xlab = "Math Score", ylab = "Number of Student", main = "Distribution of Math Scores")

Change the y axis into percent
hist(pisa$Math,col = "dark green", border = "white", xlab = "Math Score", ylab = "Number of Student", main = "Distribution of Math Scores", prob = T)

Draw the density line with red color
hist(pisa$Math,col = "dark green", border = "white", xlab = "Math Score", ylab = "Number of Student", main = "Distribution of Math Scores", prob = T)
lines(density(pisa$Math), col = "red", lwd = 2,lty=2)

table and barplot
tab = table(pisa$Area)
barplot(tab, col = c("red","blue","green"))

boxplot
boxplot(pisa$Math,col = "green")

boxplot between 3 column
boxplot(pisa$Math ~ pisa$Area, col = c("red","green","blue"))

Draw the correlation
plot(pisa$Science~pisa$PARED,col="red")

boxplot(pisa$Science~pisa$PARED,col="red")

plot(pisa$Science~pisa$Math,col="red")
