Sys.setlocale("LC_ALL","C")
packages = c(
"dplyr","ggplot2","d3heatmap","googleVis","devtools","plotly", "xgboost",
"magrittr","caTools","ROCR","corrplot", "rpart", "rpart.plot",
"doParallel", "caret", "glmnet", "Matrix", "e1071", "randomForest",
"flexclust", "FactoMineR", "factoextra", "maps", "ggmap", "igraph", "rgl",
"tm", "SnowballC", "wordcloud", "slam", "Matrix", "RColorBrewer"
)
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
rm(list=ls(all=T))
options(digits=4, scipen=12)
library(dplyr)
library(ggplot2)
library(maps)
library(ggmap)
7.1 ggplot2 繪圖套件
7.1.1 基本點狀圖
WHO = read.csv("data/WHO.csv")
# Basic Plot in R
plot(WHO$GNI, WHO$FertilityRate)
library(ggplot2)
# Create the ggplot object with the data and the aesthetic mapping:
scatterplot = ggplot(WHO, aes(x = GNI, y = FertilityRate))
# Add the geom_point geometry
scatterplot + geom_point()
- compare the difference between basic plot function and ggplot function.
- notice the difference of logic between those 2 packages.
# Make a line graph instead:
scatterplot + geom_line()
# Switch back to our points:
scatterplot + geom_point()
# Redo the plot with blue triangles instead of circles:
scatterplot + geom_point(color = "blue", size = 3, shape = 21)
- add the feature of color and the size of plot at geom_point
# Another option:
scatterplot + geom_point(color = "darkred", size = 3, shape = 8)
# Add a title to the plot:
scatterplot +
geom_point(colour = "blue", size = 3, shape = 17) +
ggtitle("Fertility Rate vs. Gross National Income")
7.1.2 儲存圖檔
# Save our plot:
fertilityGNIplot = scatterplot +
geom_point(colour = "blue", size = 3, shape = 17) +
ggtitle("Fertility Rate vs. Gross National Income")
pdf("MyPlot.pdf")
print(fertilityGNIplot)
dev.off()
7.1.3 圖形元件屬性
# Color the points by region:
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = Region)) +
geom_point()
- use color as the 3rd variable shown in the gragh.
# Color the points according to life expectancy:
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = LifeExpectancy)) +
geom_point()
- color to express the difference of numeric range.
# Is the fertility rate of a country was a good predictor of the
# percentage of the population under 15?
ggplot(WHO, aes(x = FertilityRate, y = Under15)) + geom_point()
- see the shape of correlation of 2 variables.
7.1.4 數值尺度比例轉換
# Let's try a log transformation:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point()
- log the variables with large scale.
7.1.5 回歸趨勢線
# Simple linear regression model to predict the percentage of the
# population under 15, using the log of the fertility rate:
mod = lm(Under15 ~ log(FertilityRate), data = WHO)
summary(mod)
# Add this regression line to our plot:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm")
- add the regression line with stat_smooth(method = “lm”)
7.1.6 趨勢線的信賴區間
# 99% confidence interval
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", level = 0.99)
+add the confidence level with stat_smooth(method = “lm”, level = 0.99)
# No confidence interval in the plot
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", se = FALSE)
# Change the color of the regression line:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", colour = "orange")
7.1.7 分群點狀圖
# quiz-1:
ggplot(WHO, aes(x = FertilityRate, y = Under15, col=Region)) +
scale_color_brewer(palette="Accent") +
geom_point()
7.1.8 分格點狀圖
# quiz-1:
ggplot(WHO, aes(x = log(Population), y = GNI, color=Region)) +
geom_point() +
stat_smooth(method='lm') +
facet_wrap(~Region) + theme_bw()
- plot 6 gragh of 6 clusters.
LS0tDQp0aXRsZTogIkFTNy0wQSBnZ3Bsb3QyIOe5quWcluWll+S7tiINCmF1dGhvcjogIuW8teWPoeWTsiBNMDY0MTExMDMzLCAyMDE4LzA4LzAyIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KPGJyPg0KDQpgYGB7cn0NClN5cy5zZXRsb2NhbGUoIkxDX0FMTCIsIkMiKQ0KcGFja2FnZXMgPSBjKA0KICAiZHBseXIiLCJnZ3Bsb3QyIiwiZDNoZWF0bWFwIiwiZ29vZ2xlVmlzIiwiZGV2dG9vbHMiLCJwbG90bHkiLCAieGdib29zdCIsDQogICJtYWdyaXR0ciIsImNhVG9vbHMiLCJST0NSIiwiY29ycnBsb3QiLCAicnBhcnQiLCAicnBhcnQucGxvdCIsDQogICJkb1BhcmFsbGVsIiwgImNhcmV0IiwgImdsbW5ldCIsICJNYXRyaXgiLCAiZTEwNzEiLCAicmFuZG9tRm9yZXN0IiwNCiAgImZsZXhjbHVzdCIsICJGYWN0b01pbmVSIiwgImZhY3RvZXh0cmEiLCAibWFwcyIsICJnZ21hcCIsICJpZ3JhcGgiLCAicmdsIiwNCiAgInRtIiwgIlNub3diYWxsQyIsICJ3b3JkY2xvdWQiLCAic2xhbSIsICJNYXRyaXgiLCAiUkNvbG9yQnJld2VyIg0KICApDQpleGlzdGluZyA9IGFzLmNoYXJhY3RlcihpbnN0YWxsZWQucGFja2FnZXMoKVssMV0pDQpmb3IocGtnIGluIHBhY2thZ2VzWyEocGFja2FnZXMgJWluJSBleGlzdGluZyldKSBpbnN0YWxsLnBhY2thZ2VzKHBrZykNCmBgYA0KDQpgYGB7ciBlY2hvPVQsIG1lc3NhZ2U9RiwgY2FjaGU9Riwgd2FybmluZz1GfQ0Kcm0obGlzdD1scyhhbGw9VCkpDQpvcHRpb25zKGRpZ2l0cz00LCBzY2lwZW49MTIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShtYXBzKQ0KbGlicmFyeShnZ21hcCkNCmBgYA0KDQotIC0gLQ0KDQojIyMgNy4xIGBnZ3Bsb3QyYCDnuarlnJblpZfku7YNCg0KIyMjIyMgNy4xLjEg5Z+65pys6bue54uA5ZyWDQpgYGB7cn0NCldITyA9IHJlYWQuY3N2KCJkYXRhL1dITy5jc3YiKQ0KYGBgDQoNCmBgYHtyfQ0KIyBCYXNpYyBQbG90IGluIFIgDQpwbG90KFdITyRHTkksIFdITyRGZXJ0aWxpdHlSYXRlKQ0KYGBgDQoNCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KIyBDcmVhdGUgdGhlIGdncGxvdCBvYmplY3Qgd2l0aCB0aGUgZGF0YSBhbmQgdGhlIGFlc3RoZXRpYyBtYXBwaW5nOg0Kc2NhdHRlcnBsb3QgPSBnZ3Bsb3QoV0hPLCBhZXMoeCA9IEdOSSwgeSA9IEZlcnRpbGl0eVJhdGUpKQ0KYGBgDQoNCmBgYHtyfQ0KIyBBZGQgdGhlIGdlb21fcG9pbnQgZ2VvbWV0cnkNCnNjYXR0ZXJwbG90ICsgZ2VvbV9wb2ludCgpIA0KYGBgDQoNCisgY29tcGFyZSB0aGUgZGlmZmVyZW5jZSBiZXR3ZWVuIGJhc2ljIHBsb3QgZnVuY3Rpb24gYW5kIGdncGxvdCBmdW5jdGlvbi4NCisgIG5vdGljZSB0aGUgZGlmZmVyZW5jZSBvZiBsb2dpYyBiZXR3ZWVuIHRob3NlIDIgcGFja2FnZXMuDQoNCmBgYHtyfQ0KIyBNYWtlIGEgbGluZSBncmFwaCBpbnN0ZWFkOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX2xpbmUoKQ0KYGBgDQoNCmBgYHtyfQ0KIyBTd2l0Y2ggYmFjayB0byBvdXIgcG9pbnRzOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX3BvaW50KCkNCmBgYA0KDQpgYGB7cn0NCiMgUmVkbyB0aGUgcGxvdCB3aXRoIGJsdWUgdHJpYW5nbGVzIGluc3RlYWQgb2YgY2lyY2xlczoNCnNjYXR0ZXJwbG90ICsgZ2VvbV9wb2ludChjb2xvciA9ICJibHVlIiwgc2l6ZSA9IDMsIHNoYXBlID0gMjEpDQpgYGANCg0KKyBhZGQgdGhlIGZlYXR1cmUgb2YgY29sb3IgYW5kIHRoZSBzaXplIG9mIHBsb3QgYXQgZ2VvbV9wb2ludA0KDQpgYGB7cn0NCiMgQW5vdGhlciBvcHRpb246DQpzY2F0dGVycGxvdCArIGdlb21fcG9pbnQoY29sb3IgPSAiZGFya3JlZCIsIHNpemUgPSAzLCBzaGFwZSA9IDgpDQpgYGANCg0KYGBge3J9DQojIEFkZCBhIHRpdGxlIHRvIHRoZSBwbG90Og0Kc2NhdHRlcnBsb3QgKyANCiAgZ2VvbV9wb2ludChjb2xvdXIgPSAiYmx1ZSIsIHNpemUgPSAzLCBzaGFwZSA9IDE3KSArIA0KICBnZ3RpdGxlKCJGZXJ0aWxpdHkgUmF0ZSB2cy4gR3Jvc3MgTmF0aW9uYWwgSW5jb21lIikNCmBgYA0KDQoNCiMjIyMjIDcuMS4yIOWEsuWtmOWcluaqlA0KYGBge3J9DQojIFNhdmUgb3VyIHBsb3Q6DQpmZXJ0aWxpdHlHTklwbG90ID0gc2NhdHRlcnBsb3QgKyANCiAgZ2VvbV9wb2ludChjb2xvdXIgPSAiYmx1ZSIsIHNpemUgPSAzLCBzaGFwZSA9IDE3KSArIA0KICBnZ3RpdGxlKCJGZXJ0aWxpdHkgUmF0ZSB2cy4gR3Jvc3MgTmF0aW9uYWwgSW5jb21lIikNCg0KcGRmKCJNeVBsb3QucGRmIikNCnByaW50KGZlcnRpbGl0eUdOSXBsb3QpDQpkZXYub2ZmKCkNCg0KYGBgDQoNCiMjIyMjIDcuMS4zIOWcluW9ouWFg+S7tuWxrOaApw0KYGBge3J9DQojIENvbG9yIHRoZSBwb2ludHMgYnkgcmVnaW9uOg0KZ2dwbG90KFdITywgYWVzKHggPSBHTkksIHkgPSBGZXJ0aWxpdHlSYXRlLCBjb2xvciA9IFJlZ2lvbikpICsgDQogIGdlb21fcG9pbnQoKQ0KYGBgDQoNCisgdXNlIGNvbG9yIGFzIHRoZSAzcmQgdmFyaWFibGUgc2hvd24gaW4gdGhlIGdyYWdoLg0KDQpgYGB7cn0NCiMgQ29sb3IgdGhlIHBvaW50cyBhY2NvcmRpbmcgdG8gbGlmZSBleHBlY3RhbmN5Og0KZ2dwbG90KFdITywgYWVzKHggPSBHTkksIHkgPSBGZXJ0aWxpdHlSYXRlLCBjb2xvciA9IExpZmVFeHBlY3RhbmN5KSkgKyANCiAgZ2VvbV9wb2ludCgpDQpgYGANCg0KKyBjb2xvciB0byBleHByZXNzIHRoZSBkaWZmZXJlbmNlIG9mIG51bWVyaWMgcmFuZ2UuDQoNCmBgYHtyfQ0KIyBJcyB0aGUgZmVydGlsaXR5IHJhdGUgb2YgYSBjb3VudHJ5IHdhcyBhIGdvb2QgcHJlZGljdG9yIG9mIHRoZSANCiMgcGVyY2VudGFnZSBvZiB0aGUgcG9wdWxhdGlvbiB1bmRlciAxNT8NCmdncGxvdChXSE8sIGFlcyh4ID0gRmVydGlsaXR5UmF0ZSwgeSA9IFVuZGVyMTUpKSArIGdlb21fcG9pbnQoKQ0KYGBgDQoNCisgc2VlIHRoZSBzaGFwZSBvZiBjb3JyZWxhdGlvbiBvZiAyIHZhcmlhYmxlcy4NCg0KIyMjIyMgNy4xLjQg5pW45YC85bC65bqm5q+U5L6L6L2J5o+bDQpgYGB7cn0NCiMgTGV0J3MgdHJ5IGEgbG9nIHRyYW5zZm9ybWF0aW9uOg0KZ2dwbG90KFdITywgYWVzKHggPSBsb2coRmVydGlsaXR5UmF0ZSksIHkgPSBVbmRlcjE1KSkgKyBnZW9tX3BvaW50KCkNCmBgYA0KDQorIGxvZyB0aGUgdmFyaWFibGVzIHdpdGggbGFyZ2Ugc2NhbGUuDQoNCiMjIyMjIDcuMS41IOWbnuatuOi2qOWLoue3mg0KYGBge3J9DQojIFNpbXBsZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbCB0byBwcmVkaWN0IHRoZSBwZXJjZW50YWdlIG9mIHRoZSANCiMgcG9wdWxhdGlvbiB1bmRlciAxNSwgdXNpbmcgdGhlIGxvZyBvZiB0aGUgZmVydGlsaXR5IHJhdGU6DQptb2QgPSBsbShVbmRlcjE1IH4gbG9nKEZlcnRpbGl0eVJhdGUpLCBkYXRhID0gV0hPKQ0Kc3VtbWFyeShtb2QpDQpgYGANCg0KYGBge3J9DQojIEFkZCB0aGlzIHJlZ3Jlc3Npb24gbGluZSB0byBvdXIgcGxvdDoNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKEZlcnRpbGl0eVJhdGUpLCB5ID0gVW5kZXIxNSkpICsgDQogIGdlb21fcG9pbnQoKSArIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIpDQpgYGANCg0KKyBhZGQgdGhlIHJlZ3Jlc3Npb24gbGluZSB3aXRoIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIpDQoNCiMjIyMjIDcuMS42IOi2qOWLoue3mueahOS/oeiztOWNgOmWkw0KYGBge3J9DQojIDk5JSBjb25maWRlbmNlIGludGVydmFsDQpnZ3Bsb3QoV0hPLCBhZXMoeCA9IGxvZyhGZXJ0aWxpdHlSYXRlKSwgeSA9IFVuZGVyMTUpKSArIA0KICBnZW9tX3BvaW50KCkgKyBzdGF0X3Ntb290aChtZXRob2QgPSAibG0iLCBsZXZlbCA9IDAuOTkpDQpgYGANCg0KK2FkZCB0aGUgY29uZmlkZW5jZSBsZXZlbCB3aXRoIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIsIGxldmVsID0gMC45OSkNCg0KYGBge3J9DQojIE5vIGNvbmZpZGVuY2UgaW50ZXJ2YWwgaW4gdGhlIHBsb3QNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKEZlcnRpbGl0eVJhdGUpLCB5ID0gVW5kZXIxNSkpICsgDQogIGdlb21fcG9pbnQoKSArIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIsIHNlID0gRkFMU0UpDQpgYGANCg0KYGBge3J9DQojIENoYW5nZSB0aGUgY29sb3Igb2YgdGhlIHJlZ3Jlc3Npb24gbGluZToNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKEZlcnRpbGl0eVJhdGUpLCB5ID0gVW5kZXIxNSkpICsgDQogIGdlb21fcG9pbnQoKSArIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIsIGNvbG91ciA9ICJvcmFuZ2UiKQ0KYGBgDQoNCiMjIyMjIDcuMS43IOWIhue+pOm7nueLgOWclg0KYGBge3J9DQojIHF1aXotMToNCmdncGxvdChXSE8sIGFlcyh4ID0gRmVydGlsaXR5UmF0ZSwgeSA9IFVuZGVyMTUsIGNvbD1SZWdpb24pKSArIA0KICBzY2FsZV9jb2xvcl9icmV3ZXIocGFsZXR0ZT0iQWNjZW50IikgKw0KICBnZW9tX3BvaW50KCkNCmBgYA0KDQojIyMjIyA3LjEuOCDliIbmoLzpu57ni4DlnJYNCmBgYHtyfQ0KIyBxdWl6LTE6DQpnZ3Bsb3QoV0hPLCBhZXMoeCA9IGxvZyhQb3B1bGF0aW9uKSwgeSA9IEdOSSwgY29sb3I9UmVnaW9uKSkgKyANCiAgZ2VvbV9wb2ludCgpICsgDQogIHN0YXRfc21vb3RoKG1ldGhvZD0nbG0nKSArDQogIGZhY2V0X3dyYXAoflJlZ2lvbikgKyB0aGVtZV9idygpDQpgYGANCg0KKyBwbG90IDYgZ3JhZ2ggb2YgNiBjbHVzdGVycy4NCg0KPGJyPg0KDQotIC0gLQ0KDQo8YnI+PGJyPjxicj48YnI+PGJyPg0KDQo8c3R5bGU+DQouY2FwdGlvbiB7DQogIGNvbG9yOiAjNzc3Ow0KICBtYXJnaW4tdG9wOiAxMHB4Ow0KfQ0KcCBjb2RlIHsNCiAgd2hpdGUtc3BhY2U6IGluaGVyaXQ7DQp9DQpwcmUgew0KICB3b3JkLWJyZWFrOiBub3JtYWw7DQogIHdvcmQtd3JhcDogbm9ybWFsOw0KICBsaW5lLWhlaWdodDogMTsNCn0NCnByZSBjb2RlIHsNCiAgd2hpdGUtc3BhY2U6IGluaGVyaXQ7DQp9DQpwLGxpIHsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCi5yew0KICBsaW5lLWhlaWdodDogMS4yOw0KfQ0KDQp0aXRsZXsNCiAgY29sb3I6ICNjYzAwMDA7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpib2R5ew0KICBmb250LWZhbWlseTogIlRyZWJ1Y2hldCBNUyIsICLlvq7ou5/mraPpu5Hpq5QiLCAiTWljcm9zb2Z0IEpoZW5nSGVpIjsNCn0NCg0KaDEsaDIsaDMsaDQsaDV7DQogIGNvbG9yOiAjMDA4ODAwOw0KICBmb250LWZhbWlseTogIlRyZWJ1Y2hldCBNUyIsICLlvq7ou5/mraPpu5Hpq5QiLCAiTWljcm9zb2Z0IEpoZW5nSGVpIjsNCn0NCg0KaDN7DQogIGNvbG9yOiAjYjM2YjAwOw0KICBiYWNrZ3JvdW5kOiAjZmZlMGIzOw0KICBsaW5lLWhlaWdodDogMjsNCiAgZm9udC13ZWlnaHQ6IGJvbGQ7DQp9DQoNCmg1ew0KICBjb2xvcjogIzAwNjAwMDsNCiAgYmFja2dyb3VuZDogI2ZmZmZlMDsNCiAgbGluZS1oZWlnaHQ6IDI7DQogIGZvbnQtd2VpZ2h0OiBib2xkOw0KfQ0KDQplbXsNCiAgY29sb3I6ICMwMDAwYzA7DQogIGJhY2tncm91bmQ6ICNmMGYwZjA7DQogIH0NCjwvc3R5bGU+DQoNCg==