#title:“Baitap2_06.01.2020” ##author:“hocuong68swic” ###date:“07/01/2020” # Phan tich bieu do don gian …{r} # Task 1 : Bieu do phan bo voi hist() setwd(“E:/cuong_dataR/test”) pisa = read.csv(“PISA Data Vietnam 2015.csv”, header = T) attach(pisa) names(pisa) # Tgu nghiem voi hist() hist(pisa\(Math, col="blue") hist(pisa\)Math, col=“blue”, border=“white”) hist(pisa\(Science, col="blue", border="white", xlab="Science Score", ylab="Frequency", main="Distribution of Science Scores") # Them duong bieu dien hist(pisa\)Science, col=“blue”, border=“white”, prob=T) lines(density(pisa\(Science), col="red") # Ve hai phan bo cho nam va nu p1 = hist(pisa\)Science[pisa$Gender==“Boys”], plot=F) p2 = hist(pisa\(Science[pisa\)Gender==“Girls”], plot=F) plot(p1, col=“skyblue”, border=“white”) plot(p2, add=T, col=scales::alpha(“green”, 0.5), border=“white”) # Task 2: Bieu do phan bo voi package lattice() # Ve bieu do phan bo cho nhieu nhom dung ham densityplot library(lattice) densityplot(~Science, groups=Gender, data=pisa) densityplot(~Science, groups=Area, data=pisa) densityplot(~Science, groups=Area, data=pisa, auto.key=list(space=“top”)) # Chia 3 cua so dung package lattice va gridExtra) p1 = densityplot(~Science, groups=Area, data=pisa, auto.key=list(space=“top”)) p2 = densityplot(~Math, groups=Area, data=pisa) p3 = densityplot(~Read, groups=Area, data=pisa) library(gridExtra) grid.arrange(p1, p2, p3, ncol=3) # Task 3: Bieu do hop voi boxplot() # chia man hinh ra 3 cua so par(mfrow=c(1, 3)) boxplot(pisa\(Science, col="purple") boxplot(pisa\)Science ~ pisa\(Gender, col=c("blue", "red")) boxplot(pisa\)Science ~ pisa\(Region, col=c("blue", "red", "purple")) # Task 4: Bieu do tuong quan voi plot() # Tim hieu moi lien quan giua PARED va diem mon khoa hoc par(mfrow = c(1,1)) plot(pisa\)Science~ pisa\(PARED, col="blue") plot(pisa\)Science ~ pisa\(PARED, col="blue") abline(lm(pisa\)Science ~ pisa\(PARED), col="red") # Phan tich bieu do chat luong cao dung package (ggplot2) # Task 5: Bieu do tuong quan detach(pisa) setwd("E:/cuong_dataR/test") ob = read.csv("Obesity data.csv", header = T) attach(ob) names(ob) # Tao bien moi OB dua vao bien bmi ob\)OB [ob$bmi < 18.5] = “Underweight” ob\(OB [ob\)bmi >= 18.5 & ob\(bmi < 24.9] = "Normal" ob\)OB [ob\(bmi >= 25.0 & ob\)bmi < 29.9] = “Overweight” ob\(OB [ob\)bmi >= 30] = “Obese” ob\(OB = factor(ob\)OB, levels=c(“Underweight”, “Normal”, “Overweight”, “Obese”)) # Tim hieu phan bo cua OB library(ggplot2) p = ggplot(data=ob, aes(OB, fill=OB)) + geom_bar() p = p + xlab(“Obesity group”) + ylab(“Frequency”) p + theme(legend.position=“none”)+ theme_bw()+ theme_classic() # Task 6: Ve bieu do tuogn quan giua weight va pcfat dung ggplot2 # Bieu do don gian library(ggthemes) p = ggplot(data=ob, aes(x=weight, y=pcfat)) p = p + geom_point() + geom_smooth() p = p + xlab(“Weight”) + ylab(“Percent body fat”) + ggtitle(“Weight and Percent Body Fat”) p = p + theme(plot.title=element_text(hjust=0.5)) p # Bieu do theo nhom nam nu p = ggplot(data=ob, aes(x=weight, y=pcfat, fill=gender, col=gender)) p = p + geom_point() + geom_smooth() p = p + xlab(“Weight”) + ylab(“Percent body fat”) + ggtitle(“Weight and Percent Body Fat for men and women separately”) p = p + theme(plot.title=element_text(hjust=0.5)) p + theme_economist() # Test voi theme_tufte(), theme_few(), theme_wsj(), theme_clean(), theme_hc() p + theme_tufte() p + theme_few() p + theme_wsj() p + theme_clean() p + theme_hc() # Bieu do tuong quan va bieu do phan bo dung package ggExtra library(ggExtra) p = ggplot(data=ob, aes(x=bmi, y=pcfat, fill=gender, col=gender)) p = p + geom_point() + geom_smooth() ggMarginal(p, type=“histogram”, groupColour=T, groupFill=T) # Thu nghiem voi type (“density”, “violin”, “boxplot”) ggMarginal(p, type=“density”, groupColour=T, groupFill=T) # Bieu do tuong quan da bien dung package GGally # Chon bien quan tam library(GGally) dat = ob[, c(“gender”, “age”, “bmi”, “weight”, “height”, “pcfat”)] ggpairs(dat) # Them mau theo nhom ggpairs(data=dat, mapping = aes(color = gender)) # So sanh voi ggpairs(data=ob, mapping = aes(color = gender), columns = c(“age”, “weight”, “bmi”, “pcfat”)) # Task 7 : Ve bieu do phan bo (histogram) detach(ob) attach(pisa) # Ve bieu do phan bo diem mon Khoa hoc library(gridExtra)
p = ggplot(data=pisa, aes(x=Science)) p1 = p + geom_histogram(color=“white”, fill=“blue”) p = ggplot(data=pisa, aes(x=Science)) p = p + geom_histogram(aes(y=..density..), color=“white”, fill=“blue”) p2 = p + geom_density(col=“red”) grid.arrange(p1, p2, ncol=2) # Phan bo diem Khoa hoc theo vung (Area) p = ggplot(data=pisa, aes(x=Science, fill=Area)) p1 = p + geom_histogram(position=“dodge”)
p2 = ggplot(data=pisa, aes(x=Science, fill=Area, color=Area)) + geom_density(alpha = 0.1)
grid.arrange(p1, p2, nrow=2) # Histogram va xac suat tich luy p = ggplot(data=pisa, aes(HISCED)) p = p + stat_ecdf(color=“red”, lwd=1)
p = p + geom_bar(aes(y = (..count..)/sum(..count..)), fill=“blue”, colour=‘yellow’) p # Task 8 : Ve bieu do hop (boxplot) # Bieu do hop theo Vung p = ggplot(data=pisa, aes(x=Area, y=Science, col=Area, fill=Area)) p1 = p + geom_boxplot(col=“black”) p2 = p + geom_boxplot(col=“black”) + geom_jitter(alpha=0.05) grid.arrange(p1, p2, ncol=2) # Bieu do hop theo Kinh te p = ggplot(data=pisa, aes(x=PARED, y=Science, fill=PARED)) p1 = p + geom_boxplot(col=“black”) + geom_jitter(alpha=0.02) p = ggplot(data=na.omit(pisa), aes(x=factor(PARED), y=Science, fill=factor(PARED), color=factor(PARED))) p2 = p + geom_boxplot(col=“black”) + geom_jitter(alpha=0.05) grid.arrange(p1, p2, ncol=2) …