Sys.setlocale("LC_ALL","C")
[1] "C"
packages = c(
"dplyr","ggplot2","d3heatmap","googleVis","devtools","plotly", "xgboost",
"magrittr","caTools","ROCR","corrplot", "rpart", "rpart.plot",
"doParallel", "caret", "glmnet", "Matrix", "e1071", "randomForest",
"flexclust", "FactoMineR", "factoextra", "maps", "ggmap", "igraph", "rgl",
"tm", "SnowballC", "wordcloud", "slam", "Matrix", "RColorBrewer"
)
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
rm(list=ls(all=T))
options(digits=4, scipen=12)
library(dplyr)
library(ggplot2)
library(maps)
library(ggmap)
7.1 ggplot2 繪圖套件
7.1.1 基本點狀圖
WHO = read.csv("data/WHO.csv")
# Basic Plot in R
plot(WHO$GNI, WHO$FertilityRate) #都是連續變數的話,設定xy就可畫出圖 #R會看著這是甚麼物件,畫出甚麼圖

library(ggplot2)
# Create the ggplot object with the data and the aesthetic mapping:
scatterplot = ggplot(WHO, aes(x = GNI, y = FertilityRate)) #ggplot2中會將一個資料框,轉成繪圖物件 #data、aesthetic mapping、which geometric objects we want to determine how the data values are rendered graphically
# Add the geom_point geometry
scatterplot + geom_point() #畫點圖 #don't have the data set name with a dollar sign in front of the label on each axis, just the variable name

# Make a line graph instead:
scatterplot + geom_line() #畫線圖

# Switch back to our points:
scatterplot + geom_point()

# Redo the plot with blue triangles instead of circles:
scatterplot + geom_point(color = "blue", size = 3, shape = 21)

# Another option:
scatterplot + geom_point(color = "darkred", size = 3, shape = 8)

# Add a title to the plot:
scatterplot +
geom_point(colour = "blue", size = 3, shape = 17) +
ggtitle("Fertility Rate vs. Gross National Income")

7.1.2 儲存圖檔
# Save our plot:
fertilityGNIplot = scatterplot +
geom_point(colour = "blue", size = 3, shape = 17) +
ggtitle("Fertility Rate vs. Gross National Income")
pdf("MyPlot.pdf")
print(fertilityGNIplot)
dev.off()
null device
1
7.1.3 圖形元件屬性
# Color the points by region:
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = Region)) + #Region是一個類別,所以不同顏色給一個類別
geom_point()

# Color the points according to life expectancy:
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = LifeExpectancy)) +
geom_point()

#Lighter blue corresponds to a higher life expectancy,and darker blue corresponds to a lower life expectancy.
# Is the fertility rate of a country was a good predictor of the
# percentage of the population under 15?
ggplot(WHO, aes(x = FertilityRate, y = Under15)) + geom_point()

7.1.4 數值尺度比例轉換
# Let's try a log transformation:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point()

7.1.5 回歸趨勢線
# Simple linear regression model to predict the percentage of the
# population under 15, using the log of the fertility rate:
mod = lm(Under15 ~ log(FertilityRate), data = WHO)
summary(mod)
Call:
lm(formula = Under15 ~ log(FertilityRate), data = WHO)
Residuals:
Min 1Q Median 3Q Max
-10.313 -1.774 0.045 1.744 7.717
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.654 0.448 17.1 <2e-16 ***
log(FertilityRate) 22.055 0.418 52.8 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.65 on 181 degrees of freedom
(11 observations deleted due to missingness)
Multiple R-squared: 0.939, Adjusted R-squared: 0.939
F-statistic: 2.79e+03 on 1 and 181 DF, p-value: <2e-16
# Add this regression line to our plot:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm") #加一個趨勢線stat_smooth(method = "lm")

7.1.6 趨勢線的信賴區間
# 99% confidence interval
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", level = 0.99) #0.99是信賴區間(平均值的,不是點的),灰色陰影的部分

#不能用平均值做決定
# No confidence interval in the plot
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", se = FALSE)

# Change the color of the regression line:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", colour = "orange")

7.1.7 分群點狀圖
# quiz-1:
ggplot(WHO, aes(x = FertilityRate, y = Under15, col=Region)) +
scale_color_brewer(palette="Accent") + #點與點之間的關係、是否因什麼類別而有所不同
geom_point()

7.1.8 分格點狀圖
# quiz-1:
ggplot(WHO, aes(x = log(Population), y = GNI, color=Region)) +
geom_point() +
stat_smooth(method='lm') +
facet_wrap(~Region) + theme_bw()

LS0tDQp0aXRsZTogIkFTNy0wQSBnZ3Bsb3QyIOe5quWcluWll+S7tiINCmF1dGhvcjogIuS9leiqnuWplSBNMDY0MDIwMDQwLCAyMDE4LzA3LzMwIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KPGJyPg0KDQpgYGB7cn0NClN5cy5zZXRsb2NhbGUoIkxDX0FMTCIsIkMiKQ0KcGFja2FnZXMgPSBjKA0KICAiZHBseXIiLCJnZ3Bsb3QyIiwiZDNoZWF0bWFwIiwiZ29vZ2xlVmlzIiwiZGV2dG9vbHMiLCJwbG90bHkiLCAieGdib29zdCIsDQogICJtYWdyaXR0ciIsImNhVG9vbHMiLCJST0NSIiwiY29ycnBsb3QiLCAicnBhcnQiLCAicnBhcnQucGxvdCIsDQogICJkb1BhcmFsbGVsIiwgImNhcmV0IiwgImdsbW5ldCIsICJNYXRyaXgiLCAiZTEwNzEiLCAicmFuZG9tRm9yZXN0IiwNCiAgImZsZXhjbHVzdCIsICJGYWN0b01pbmVSIiwgImZhY3RvZXh0cmEiLCAibWFwcyIsICJnZ21hcCIsICJpZ3JhcGgiLCAicmdsIiwNCiAgInRtIiwgIlNub3diYWxsQyIsICJ3b3JkY2xvdWQiLCAic2xhbSIsICJNYXRyaXgiLCAiUkNvbG9yQnJld2VyIg0KICApDQpleGlzdGluZyA9IGFzLmNoYXJhY3RlcihpbnN0YWxsZWQucGFja2FnZXMoKVssMV0pDQpmb3IocGtnIGluIHBhY2thZ2VzWyEocGFja2FnZXMgJWluJSBleGlzdGluZyldKSBpbnN0YWxsLnBhY2thZ2VzKHBrZykNCmBgYA0KDQpgYGB7ciBlY2hvPVQsIG1lc3NhZ2U9RiwgY2FjaGU9Riwgd2FybmluZz1GfQ0Kcm0obGlzdD1scyhhbGw9VCkpDQpvcHRpb25zKGRpZ2l0cz00LCBzY2lwZW49MTIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShtYXBzKQ0KbGlicmFyeShnZ21hcCkNCmBgYA0KDQotIC0gLQ0KDQojIyMgNy4xIGBnZ3Bsb3QyYCDnuarlnJblpZfku7YNCg0KIyMjIyMgNy4xLjEg5Z+65pys6bue54uA5ZyWDQpgYGB7cn0NCldITyA9IHJlYWQuY3N2KCJkYXRhL1dITy5jc3YiKQ0KYGBgDQoNCmBgYHtyfQ0KIyBCYXNpYyBQbG90IGluIFIgDQpwbG90KFdITyRHTkksIFdITyRGZXJ0aWxpdHlSYXRlKSAgI+mDveaYr+mAo+e6jOiuiuaVuOeahOipse+8jOioreWumnh55bCx5Y+v55Wr5Ye65ZyWICNS5pyD55yL6JGX6YCZ5piv55Sa6bq854mp5Lu277yM55Wr5Ye655Sa6bq85ZyWDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQojIENyZWF0ZSB0aGUgZ2dwbG90IG9iamVjdCB3aXRoIHRoZSBkYXRhIGFuZCB0aGUgYWVzdGhldGljIG1hcHBpbmc6DQpzY2F0dGVycGxvdCA9IGdncGxvdChXSE8sIGFlcyh4ID0gR05JLCB5ID0gRmVydGlsaXR5UmF0ZSkpICAjZ2dwbG90MuS4reacg+Wwh+S4gOWAi+izh+aWmeahhu+8jOi9ieaIkOe5quWclueJqeS7tiANCiNkYXRh44CBYWVzdGhldGljIG1hcHBpbmfjgIF3aGljaCBnZW9tZXRyaWMgb2JqZWN0cyB3ZSB3YW50IHRvIGRldGVybWluZSBob3cgdGhlIGRhdGEgdmFsdWVzIGFyZSByZW5kZXJlZCBncmFwaGljYWxseQ0KYGBgDQoNCmBgYHtyfQ0KIyBBZGQgdGhlIGdlb21fcG9pbnQgZ2VvbWV0cnkNCnNjYXR0ZXJwbG90ICsgZ2VvbV9wb2ludCgpICAj55Wr6bue5ZyWICNkb24ndCBoYXZlIHRoZSBkYXRhIHNldCBuYW1lIHdpdGggYSBkb2xsYXIgc2lnbiBpbiBmcm9udCBvZiB0aGUgbGFiZWwgb24gZWFjaCBheGlzLCBqdXN0IHRoZSB2YXJpYWJsZSBuYW1lDQpgYGANCg0KYGBge3J9DQojIE1ha2UgYSBsaW5lIGdyYXBoIGluc3RlYWQ6DQpzY2F0dGVycGxvdCArIGdlb21fbGluZSgpICAj55Wr57ea5ZyWDQpgYGANCg0KYGBge3J9DQojIFN3aXRjaCBiYWNrIHRvIG91ciBwb2ludHM6DQpzY2F0dGVycGxvdCArIGdlb21fcG9pbnQoKQ0KYGBgDQoNCmBgYHtyfQ0KIyBSZWRvIHRoZSBwbG90IHdpdGggYmx1ZSB0cmlhbmdsZXMgaW5zdGVhZCBvZiBjaXJjbGVzOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX3BvaW50KGNvbG9yID0gImJsdWUiLCBzaXplID0gMywgc2hhcGUgPSAyMSkNCmBgYA0KDQpgYGB7cn0NCiMgQW5vdGhlciBvcHRpb246DQpzY2F0dGVycGxvdCArIGdlb21fcG9pbnQoY29sb3IgPSAiZGFya3JlZCIsIHNpemUgPSAzLCBzaGFwZSA9IDgpDQpgYGANCg0KYGBge3J9DQojIEFkZCBhIHRpdGxlIHRvIHRoZSBwbG90Og0Kc2NhdHRlcnBsb3QgKyANCiAgZ2VvbV9wb2ludChjb2xvdXIgPSAiYmx1ZSIsIHNpemUgPSAzLCBzaGFwZSA9IDE3KSArIA0KICBnZ3RpdGxlKCJGZXJ0aWxpdHkgUmF0ZSB2cy4gR3Jvc3MgTmF0aW9uYWwgSW5jb21lIikNCmBgYA0KDQoNCiMjIyMjIDcuMS4yIOWEsuWtmOWcluaqlA0KYGBge3J9DQojIFNhdmUgb3VyIHBsb3Q6DQpmZXJ0aWxpdHlHTklwbG90ID0gc2NhdHRlcnBsb3QgKyANCiAgZ2VvbV9wb2ludChjb2xvdXIgPSAiYmx1ZSIsIHNpemUgPSAzLCBzaGFwZSA9IDE3KSArIA0KICBnZ3RpdGxlKCJGZXJ0aWxpdHkgUmF0ZSB2cy4gR3Jvc3MgTmF0aW9uYWwgSW5jb21lIikNCg0KcGRmKCJNeVBsb3QucGRmIikNCnByaW50KGZlcnRpbGl0eUdOSXBsb3QpDQpkZXYub2ZmKCkNCg0KYGBgDQoNCiMjIyMjIDcuMS4zIOWcluW9ouWFg+S7tuWxrOaApw0KYGBge3J9DQojIENvbG9yIHRoZSBwb2ludHMgYnkgcmVnaW9uOg0KZ2dwbG90KFdITywgYWVzKHggPSBHTkksIHkgPSBGZXJ0aWxpdHlSYXRlLCBjb2xvciA9IFJlZ2lvbikpICsgICNSZWdpb27mmK/kuIDlgIvpoZ7liKXvvIzmiYDku6XkuI3lkIzpoY/oibLntabkuIDlgIvpoZ7liKUNCiAgZ2VvbV9wb2ludCgpDQpgYGANCg0KYGBge3J9DQojIENvbG9yIHRoZSBwb2ludHMgYWNjb3JkaW5nIHRvIGxpZmUgZXhwZWN0YW5jeToNCmdncGxvdChXSE8sIGFlcyh4ID0gR05JLCB5ID0gRmVydGlsaXR5UmF0ZSwgY29sb3IgPSBMaWZlRXhwZWN0YW5jeSkpICsgDQogIGdlb21fcG9pbnQoKQ0KI0xpZ2h0ZXIgYmx1ZSBjb3JyZXNwb25kcyB0byBhIGhpZ2hlciBsaWZlIGV4cGVjdGFuY3ksYW5kIGRhcmtlciBibHVlIGNvcnJlc3BvbmRzIHRvIGEgbG93ZXIgbGlmZSBleHBlY3RhbmN5Lg0KYGBgDQoNCmBgYHtyfQ0KIyBJcyB0aGUgZmVydGlsaXR5IHJhdGUgb2YgYSBjb3VudHJ5IHdhcyBhIGdvb2QgcHJlZGljdG9yIG9mIHRoZSANCiMgcGVyY2VudGFnZSBvZiB0aGUgcG9wdWxhdGlvbiB1bmRlciAxNT8NCmdncGxvdChXSE8sIGFlcyh4ID0gRmVydGlsaXR5UmF0ZSwgeSA9IFVuZGVyMTUpKSArIGdlb21fcG9pbnQoKQ0KYGBgDQoNCiMjIyMjIDcuMS40IOaVuOWAvOWwuuW6puavlOS+i+i9ieaPmw0KYGBge3J9DQojIExldCdzIHRyeSBhIGxvZyB0cmFuc2Zvcm1hdGlvbjoNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKEZlcnRpbGl0eVJhdGUpLCB5ID0gVW5kZXIxNSkpICsgZ2VvbV9wb2ludCgpDQpgYGANCg0KDQojIyMjIyA3LjEuNSDlm57mrbjotqjli6Lnt5oNCmBgYHtyfQ0KIyBTaW1wbGUgbGluZWFyIHJlZ3Jlc3Npb24gbW9kZWwgdG8gcHJlZGljdCB0aGUgcGVyY2VudGFnZSBvZiB0aGUgDQojIHBvcHVsYXRpb24gdW5kZXIgMTUsIHVzaW5nIHRoZSBsb2cgb2YgdGhlIGZlcnRpbGl0eSByYXRlOg0KbW9kID0gbG0oVW5kZXIxNSB+IGxvZyhGZXJ0aWxpdHlSYXRlKSwgZGF0YSA9IFdITykNCnN1bW1hcnkobW9kKQ0KYGBgDQoNCmBgYHtyfQ0KIyBBZGQgdGhpcyByZWdyZXNzaW9uIGxpbmUgdG8gb3VyIHBsb3Q6DQpnZ3Bsb3QoV0hPLCBhZXMoeCA9IGxvZyhGZXJ0aWxpdHlSYXRlKSwgeSA9IFVuZGVyMTUpKSArIA0KICBnZW9tX3BvaW50KCkgKyBzdGF0X3Ntb290aChtZXRob2QgPSAibG0iKSAgI+WKoOS4gOWAi+i2qOWLoue3mnN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIpDQpgYGANCg0KIyMjIyMgNy4xLjYg6Lao5Yui57ea55qE5L+h6LO05Y2A6ZaTDQpgYGB7cn0NCiMgOTklIGNvbmZpZGVuY2UgaW50ZXJ2YWwNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKEZlcnRpbGl0eVJhdGUpLCB5ID0gVW5kZXIxNSkpICsgDQogIGdlb21fcG9pbnQoKSArIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIsIGxldmVsID0gMC45OSkgICMwLjk55piv5L+h6LO05Y2A6ZaTKOW5s+Wdh+WAvOeahO+8jOS4jeaYr+m7nueahCnvvIzngbDoibLpmbDlvbHnmoTpg6jliIYNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgI+S4jeiDveeUqOW5s+Wdh+WAvOWBmuaxuuWumg0KYGBgDQoNCmBgYHtyfQ0KIyBObyBjb25maWRlbmNlIGludGVydmFsIGluIHRoZSBwbG90DQpnZ3Bsb3QoV0hPLCBhZXMoeCA9IGxvZyhGZXJ0aWxpdHlSYXRlKSwgeSA9IFVuZGVyMTUpKSArIA0KICBnZW9tX3BvaW50KCkgKyBzdGF0X3Ntb290aChtZXRob2QgPSAibG0iLCBzZSA9IEZBTFNFKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDaGFuZ2UgdGhlIGNvbG9yIG9mIHRoZSByZWdyZXNzaW9uIGxpbmU6DQpnZ3Bsb3QoV0hPLCBhZXMoeCA9IGxvZyhGZXJ0aWxpdHlSYXRlKSwgeSA9IFVuZGVyMTUpKSArIA0KICBnZW9tX3BvaW50KCkgKyBzdGF0X3Ntb290aChtZXRob2QgPSAibG0iLCBjb2xvdXIgPSAib3JhbmdlIikNCmBgYA0KDQojIyMjIyA3LjEuNyDliIbnvqTpu57ni4DlnJYNCmBgYHtyfQ0KIyBxdWl6LTE6DQpnZ3Bsb3QoV0hPLCBhZXMoeCA9IEZlcnRpbGl0eVJhdGUsIHkgPSBVbmRlcjE1LCBjb2w9UmVnaW9uKSkgKyANCiAgc2NhbGVfY29sb3JfYnJld2VyKHBhbGV0dGU9IkFjY2VudCIpICsgICAj6bue6IiH6bue5LmL6ZaT55qE6Zec5L+C44CB5piv5ZCm5Zug5LuA6bq86aGe5Yil6ICM5pyJ5omA5LiN5ZCMDQogIGdlb21fcG9pbnQoKQ0KYGBgDQoNCiMjIyMjIDcuMS44IOWIhuagvOm7nueLgOWclg0KYGBge3J9DQojIHF1aXotMToNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKFBvcHVsYXRpb24pLCB5ID0gR05JLCBjb2xvcj1SZWdpb24pKSArIA0KICBnZW9tX3BvaW50KCkgKyANCiAgc3RhdF9zbW9vdGgobWV0aG9kPSdsbScpICsNCiAgZmFjZXRfd3JhcCh+UmVnaW9uKSArIHRoZW1lX2J3KCkNCmBgYA0KDQo8YnI+DQoNCi0gLSAtDQoNCjxicj48YnI+PGJyPjxicj48YnI+DQoNCjxzdHlsZT4NCi5jYXB0aW9uIHsNCiAgY29sb3I6ICM3Nzc7DQogIG1hcmdpbi10b3A6IDEwcHg7DQp9DQpwIGNvZGUgew0KICB3aGl0ZS1zcGFjZTogaW5oZXJpdDsNCn0NCnByZSB7DQogIHdvcmQtYnJlYWs6IG5vcm1hbDsNCiAgd29yZC13cmFwOiBub3JtYWw7DQogIGxpbmUtaGVpZ2h0OiAxOw0KfQ0KcHJlIGNvZGUgew0KICB3aGl0ZS1zcGFjZTogaW5oZXJpdDsNCn0NCnAsbGkgew0KICBmb250LWZhbWlseTogIlRyZWJ1Y2hldCBNUyIsICLlvq7ou5/mraPpu5Hpq5QiLCAiTWljcm9zb2Z0IEpoZW5nSGVpIjsNCn0NCg0KLnJ7DQogIGxpbmUtaGVpZ2h0OiAxLjI7DQp9DQoNCnRpdGxlew0KICBjb2xvcjogI2NjMDAwMDsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmJvZHl7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoMSxoMixoMyxoNCxoNXsNCiAgY29sb3I6ICMwMDg4MDA7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoM3sNCiAgY29sb3I6ICNiMzZiMDA7DQogIGJhY2tncm91bmQ6ICNmZmUwYjM7DQogIGxpbmUtaGVpZ2h0OiAyOw0KICBmb250LXdlaWdodDogYm9sZDsNCn0NCg0KaDV7DQogIGNvbG9yOiAjMDA2MDAwOw0KICBiYWNrZ3JvdW5kOiAjZmZmZmUwOw0KICBsaW5lLWhlaWdodDogMjsNCiAgZm9udC13ZWlnaHQ6IGJvbGQ7DQp9DQoNCmVtew0KICBjb2xvcjogIzAwMDBjMDsNCiAgYmFja2dyb3VuZDogI2YwZjBmMDsNCiAgfQ0KPC9zdHlsZT4NCg0K