pisa = read.csv ("G:\\NVT T1_2020\\TDTU Datasets for 2020 Workshop\\PISA Data Vietnam 2015.csv")
hist(pisa$Math, col="blue")
hist(pisa$Math, col="blue", border="white")
hist(pisa$Science, col="blue", border="white", xlab="Science
+ Score", ylab="Frequency", main="Distribution of Science Scores")
# Them duong bieu dien Hist
hist(pisa$Science, col="blue", border="white", prob=T)
lines(density(pisa$Science), col="red")
# Ve 2 phan bo (Hist )cho nam va nu -chong nhau
p1 = hist(pisa$Science[pisa$Gender=="Boys"], plot=F)
p2 = hist(pisa$Science[pisa$Gender=="Girls"], plot=F)
plot(p1, col="red", border="white")
plot(p2, add=T, col=scales::alpha("green", 0.5), border="white")
library(lattice)
densityplot(~Science, groups=Gender, data=pisa)
densityplot(~Science, groups=Gender, data=pisa,
auto.key=list(space="top"))
densityplot(~Science, groups=Area, data=pisa,
auto.key=list(space="top"))
#(Dien dai bieu do Hist (bdo cuoi) _ Phan bo diem 3 vung gan phan bo chuan - )
#Ghep 3 do thi Hist vao trong 1 hinh
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.5.3
p1 = densityplot(~Science, groups=Area, data=pisa,
auto.key=list(space="top"))
p2 = densityplot(~Math, groups=Area, data=pisa)
p3 = densityplot(~Read, groups=Area, data=pisa)
grid.arrange(p1, p2, p3, ncol=3)
## Tao bieu do voi plotbox # Chia man hinh bang 3 cua so
par(mfrow=c(1, 3))
boxplot(pisa$Science, col="purple")
boxplot(pisa$Science ~ pisa$Gender, col=c("blue", "red"))
boxplot(pisa$Science ~ pisa$Region, col=c("blue", "red",
"purple"))
plot(pisa$Science~ pisa$PARED, col="blue")
abline(lm(pisa$Science ~ pisa$PARED), col="red")
ob=read.csv("G:\\NVT T1_2020\\TDTU Datasets for 2020 Workshop\\obesity data.csv")
dim(ob)
## [1] 1217 11
names(ob)
## [1] "id" "gender" "height" "weight" "bmi" "age" "WBBMC"
## [8] "wbbmd" "fat" "lean" "pcfat"
head(ob$bmi)
## [1] 21.8 19.1 23.1 21.8 19.9 20.1
ob$OB [ob$bmi < 18.5] = "Underweight"
ob$OB [ob$bmi >= 18.5 & ob$bmi < 24.9] = "Normal"
ob$OB [ob$bmi >= 25.0 & ob$bmi < 29.9] = "Overweight"
ob$OB [ob$bmi >= 30] = "Obese"
ob$OB = factor(ob$OB, levels=c("Underweight", "Normal",
"Overweight", "Obese"))
head(ob$OB)
## [1] Normal Normal Normal Normal Normal Normal
## Levels: Underweight Normal Overweight Obese
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
p = ggplot(data=ob, aes(OB, fill=OB)) + geom_bar()
p = p + xlab("Obesity group") + ylab("Frequency")
p + theme(legend.position="none")
# Ve bieu do tuong quan giua weight va pcfat
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.5.3
library(gridExtra)
p = ggplot(data=ob, aes(x=weight, y=pcfat))
p = p + geom_point() + geom_smooth()
p = p + xlab("Weight") + ylab("Percent body fat") +
ggtitle("Weight and Percent Body Fat")
p = p + theme(plot.title=element_text(hjust=0.5))
p
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
head(ob$gender)
## [1] F M F F M F
## Levels: F M
p = ggplot(data=ob, aes(x=weight, y=pcfat, fill=gender,col=gender))
p = p + geom_point() + geom_smooth()
p = p + xlab("Weight") + ylab("Percent body fat") +
ggtitle("Weight and Percent Body Fat for men and women separately")
p = p + theme(plot.title=element_text(hjust=0.5))
p
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'