Sys.setlocale("LC_ALL","C")
packages = c(
  "dplyr","ggplot2","d3heatmap","googleVis","devtools","plotly", "xgboost",
  "magrittr","caTools","ROCR","corrplot", "rpart", "rpart.plot",
  "doParallel", "caret", "glmnet", "Matrix", "e1071", "randomForest",
  "flexclust", "FactoMineR", "factoextra", "maps", "ggmap", "igraph", "rgl",
  "tm", "SnowballC", "wordcloud", "slam", "Matrix", "RColorBrewer"
  )
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
rm(list=ls(all=T))
options(digits=4, scipen=12)
library(dplyr)
library(ggplot2)
library(maps)
package 'maps' was built under R version 3.4.4
library(ggmap)
package 'ggmap' was built under R version 3.4.4Google Maps API Terms of Service: http://developers.google.com/maps/terms.
Please cite ggmap if you use it: see citation('ggmap') for details.

7.1 ggplot2 繪圖套件

7.1.1 基本點狀圖
WHO = read.csv("data/WHO.csv")
# Basic Plot in R 
plot(WHO$GNI, WHO$FertilityRate)

library(ggplot2)
# Create the ggplot object with the data and the aesthetic mapping:
scatterplot = ggplot(WHO, aes(x = GNI, y = FertilityRate))
# Add the geom_point geometry
scatterplot + geom_point() 

# Make a line graph instead:
scatterplot + geom_line()

# Switch back to our points:
scatterplot + geom_point()

# Redo the plot with blue triangles instead of circles:
scatterplot + geom_point(color = "blue", size = 3, shape = 21)

# Another option:
scatterplot + geom_point(color = "darkred", size = 3, shape = 8)

# Add a title to the plot:
scatterplot + 
  geom_point(colour = "blue", size = 3, shape = 17) + 
  ggtitle("Fertility Rate vs. Gross National Income")
Warning message:
In scan(file = file, what = what, sep = sep, quote = quote, dec = dec,  :
  EOF within quoted string

7.1.2 儲存圖檔
# Save our plot:
fertilityGNIplot = scatterplot + 
  geom_point(colour = "blue", size = 3, shape = 17) + 
  ggtitle("Fertility Rate vs. Gross National Income")
pdf("MyPlot.pdf")
print(fertilityGNIplot)
dev.off()
null device 
          1 
7.1.3 圖形元件屬性
# Color the points by region:
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = Region)) + 
  geom_point()

# Color the points according to life expectancy:
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = LifeExpectancy)) + 
  geom_point()

# Is the fertility rate of a country was a good predictor of the 
# percentage of the population under 15?
ggplot(WHO, aes(x = FertilityRate, y = Under15)) + geom_point()

7.1.4 數值尺度比例轉換
# Let's try a log transformation:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point()

7.1.5 回歸趨勢線
# Simple linear regression model to predict the percentage of the 
# population under 15, using the log of the fertility rate:
mod = lm(Under15 ~ log(FertilityRate), data = WHO)
summary(mod)

Call:
lm(formula = Under15 ~ log(FertilityRate), data = WHO)

Residuals:
    Min      1Q  Median      3Q     Max 
-10.313  -1.774   0.045   1.744   7.717 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)           7.654      0.448    17.1   <2e-16 ***
log(FertilityRate)   22.055      0.418    52.8   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.65 on 181 degrees of freedom
  (11 observations deleted due to missingness)
Multiple R-squared:  0.939, Adjusted R-squared:  0.939 
F-statistic: 2.79e+03 on 1 and 181 DF,  p-value: <2e-16

7.1.6 趨勢線的信賴區間
# 99% confidence interval
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + 
  geom_point() + stat_smooth(method = "lm", level = 0.99)

# No confidence interval in the plot
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + 
  geom_point() + stat_smooth(method = "lm", se = FALSE)

# Change the color of the regression line:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + 
  geom_point() + stat_smooth(method = "lm", colour = "orange")

7.1.7 分群點狀圖
# quiz-1:
ggplot(WHO, aes(x = FertilityRate, y = Under15, col=Region)) + 
  scale_color_brewer(palette="Accent") +
  geom_point()

7.1.8 分格點狀圖
# quiz-1:
ggplot(WHO, aes(x = log(Population), y = GNI, color=Region)) + 
  geom_point() + 
  stat_smooth(method='lm') +
  facet_wrap(~Region) + theme_bw()








LS0tDQp0aXRsZTogIkFTNy0wQSBnZ3Bsb3QyIOe5quWcluWll+S7tiINCmF1dGhvcjogIuWKieiCsumKmCAgIE0wNjQwMjAwMjUgLCAyMDE4LzA3LzIyIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KPGJyPg0KDQpgYGB7cn0NClN5cy5zZXRsb2NhbGUoIkxDX0FMTCIsIkMiKQ0KcGFja2FnZXMgPSBjKA0KICAiZHBseXIiLCJnZ3Bsb3QyIiwiZDNoZWF0bWFwIiwiZ29vZ2xlVmlzIiwiZGV2dG9vbHMiLCJwbG90bHkiLCAieGdib29zdCIsDQogICJtYWdyaXR0ciIsImNhVG9vbHMiLCJST0NSIiwiY29ycnBsb3QiLCAicnBhcnQiLCAicnBhcnQucGxvdCIsDQogICJkb1BhcmFsbGVsIiwgImNhcmV0IiwgImdsbW5ldCIsICJNYXRyaXgiLCAiZTEwNzEiLCAicmFuZG9tRm9yZXN0IiwNCiAgImZsZXhjbHVzdCIsICJGYWN0b01pbmVSIiwgImZhY3RvZXh0cmEiLCAibWFwcyIsICJnZ21hcCIsICJpZ3JhcGgiLCAicmdsIiwNCiAgInRtIiwgIlNub3diYWxsQyIsICJ3b3JkY2xvdWQiLCAic2xhbSIsICJNYXRyaXgiLCAiUkNvbG9yQnJld2VyIg0KICApDQpleGlzdGluZyA9IGFzLmNoYXJhY3RlcihpbnN0YWxsZWQucGFja2FnZXMoKVssMV0pDQpmb3IocGtnIGluIHBhY2thZ2VzWyEocGFja2FnZXMgJWluJSBleGlzdGluZyldKSBpbnN0YWxsLnBhY2thZ2VzKHBrZykNCmBgYA0KDQpgYGB7ciBlY2hvPVQsIG1lc3NhZ2U9RiwgY2FjaGU9Riwgd2FybmluZz1GfQ0Kcm0obGlzdD1scyhhbGw9VCkpDQpvcHRpb25zKGRpZ2l0cz00LCBzY2lwZW49MTIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShtYXBzKQ0KbGlicmFyeShnZ21hcCkNCmBgYA0KDQotIC0gLQ0KDQojIyMgNy4xIGBnZ3Bsb3QyYCDnuarlnJblpZfku7YNCg0KIyMjIyMgNy4xLjEg5Z+65pys6bue54uA5ZyWDQpgYGB7cn0NCldITyA9IHJlYWQuY3N2KCJkYXRhL1dITy5jc3YiKQ0KYGBgDQoNCmBgYHtyfQ0KIyBCYXNpYyBQbG90IGluIFIgDQpwbG90KFdITyRHTkksIFdITyRGZXJ0aWxpdHlSYXRlKQ0KYGBgDQoNCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KIyBDcmVhdGUgdGhlIGdncGxvdCBvYmplY3Qgd2l0aCB0aGUgZGF0YSBhbmQgdGhlIGFlc3RoZXRpYyBtYXBwaW5nOg0Kc2NhdHRlcnBsb3QgPSBnZ3Bsb3QoV0hPLCBhZXMoeCA9IEdOSSwgeSA9IEZlcnRpbGl0eVJhdGUpKQ0KYGBgDQoNCmBgYHtyfQ0KIyBBZGQgdGhlIGdlb21fcG9pbnQgZ2VvbWV0cnkNCnNjYXR0ZXJwbG90ICsgZ2VvbV9wb2ludCgpIA0KYGBgDQoNCmBgYHtyfQ0KIyBNYWtlIGEgbGluZSBncmFwaCBpbnN0ZWFkOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX2xpbmUoKQ0KYGBgDQoNCmBgYHtyfQ0KIyBTd2l0Y2ggYmFjayB0byBvdXIgcG9pbnRzOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX3BvaW50KCkNCmBgYA0KDQpgYGB7cn0NCiMgUmVkbyB0aGUgcGxvdCB3aXRoIGJsdWUgdHJpYW5nbGVzIGluc3RlYWQgb2YgY2lyY2xlczoNCnNjYXR0ZXJwbG90ICsgZ2VvbV9wb2ludChjb2xvciA9ICJibHVlIiwgc2l6ZSA9IDMsIHNoYXBlID0gMjEpDQpgYGANCg0KYGBge3J9DQojIEFub3RoZXIgb3B0aW9uOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX3BvaW50KGNvbG9yID0gImRhcmtyZWQiLCBzaXplID0gMywgc2hhcGUgPSA4KQ0KYGBgDQoNCmBgYHtyfQ0KIyBBZGQgYSB0aXRsZSB0byB0aGUgcGxvdDoNCnNjYXR0ZXJwbG90ICsgDQogIGdlb21fcG9pbnQoY29sb3VyID0gImJsdWUiLCBzaXplID0gMywgc2hhcGUgPSAxNykgKyANCiAgZ2d0aXRsZSgiRmVydGlsaXR5IFJhdGUgdnMuIEdyb3NzIE5hdGlvbmFsIEluY29tZSIpDQpgYGANCg0KDQojIyMjIyA3LjEuMiDlhLLlrZjlnJbmqpQNCmBgYHtyfQ0KIyBTYXZlIG91ciBwbG90Og0KZmVydGlsaXR5R05JcGxvdCA9IHNjYXR0ZXJwbG90ICsgDQogIGdlb21fcG9pbnQoY29sb3VyID0gImJsdWUiLCBzaXplID0gMywgc2hhcGUgPSAxNykgKyANCiAgZ2d0aXRsZSgiRmVydGlsaXR5IFJhdGUgdnMuIEdyb3NzIE5hdGlvbmFsIEluY29tZSIpDQoNCnBkZigiTXlQbG90LnBkZiIpDQpwcmludChmZXJ0aWxpdHlHTklwbG90KQ0KZGV2Lm9mZigpDQoNCmBgYA0KDQojIyMjIyA3LjEuMyDlnJblvaLlhYPku7blsazmgKcNCmBgYHtyfQ0KIyBDb2xvciB0aGUgcG9pbnRzIGJ5IHJlZ2lvbjoNCmdncGxvdChXSE8sIGFlcyh4ID0gR05JLCB5ID0gRmVydGlsaXR5UmF0ZSwgY29sb3IgPSBSZWdpb24pKSArIA0KICBnZW9tX3BvaW50KCkNCmBgYA0KDQpgYGB7cn0NCiMgQ29sb3IgdGhlIHBvaW50cyBhY2NvcmRpbmcgdG8gbGlmZSBleHBlY3RhbmN5Og0KZ2dwbG90KFdITywgYWVzKHggPSBHTkksIHkgPSBGZXJ0aWxpdHlSYXRlLCBjb2xvciA9IExpZmVFeHBlY3RhbmN5KSkgKyANCiAgZ2VvbV9wb2ludCgpDQpgYGANCg0KYGBge3J9DQojIElzIHRoZSBmZXJ0aWxpdHkgcmF0ZSBvZiBhIGNvdW50cnkgd2FzIGEgZ29vZCBwcmVkaWN0b3Igb2YgdGhlIA0KIyBwZXJjZW50YWdlIG9mIHRoZSBwb3B1bGF0aW9uIHVuZGVyIDE1Pw0KZ2dwbG90KFdITywgYWVzKHggPSBGZXJ0aWxpdHlSYXRlLCB5ID0gVW5kZXIxNSkpICsgZ2VvbV9wb2ludCgpDQpgYGANCg0KIyMjIyMgNy4xLjQg5pW45YC85bC65bqm5q+U5L6L6L2J5o+bDQpgYGB7cn0NCiMgTGV0J3MgdHJ5IGEgbG9nIHRyYW5zZm9ybWF0aW9uOg0KZ2dwbG90KFdITywgYWVzKHggPSBsb2coRmVydGlsaXR5UmF0ZSksIHkgPSBVbmRlcjE1KSkgKyBnZW9tX3BvaW50KCkNCmBgYA0KDQoNCiMjIyMjIDcuMS41IOWbnuatuOi2qOWLoue3mg0KYGBge3J9DQojIFNpbXBsZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbCB0byBwcmVkaWN0IHRoZSBwZXJjZW50YWdlIG9mIHRoZSANCiMgcG9wdWxhdGlvbiB1bmRlciAxNSwgdXNpbmcgdGhlIGxvZyBvZiB0aGUgZmVydGlsaXR5IHJhdGU6DQptb2QgPSBsbShVbmRlcjE1IH4gbG9nKEZlcnRpbGl0eVJhdGUpLCBkYXRhID0gV0hPKQ0Kc3VtbWFyeShtb2QpDQpgYGANCg0KYGBge3J9DQojIEFkZCB0aGlzIHJlZ3Jlc3Npb24gbGluZSB0byBvdXIgcGxvdDoNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKEZlcnRpbGl0eVJhdGUpLCB5ID0gVW5kZXIxNSkpICsgDQogIGdlb21fcG9pbnQoKSArIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIpDQpgYGANCg0KIyMjIyMgNy4xLjYg6Lao5Yui57ea55qE5L+h6LO05Y2A6ZaTDQpgYGB7cn0NCiMgOTklIGNvbmZpZGVuY2UgaW50ZXJ2YWwNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKEZlcnRpbGl0eVJhdGUpLCB5ID0gVW5kZXIxNSkpICsgDQogIGdlb21fcG9pbnQoKSArIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIsIGxldmVsID0gMC45OSkNCmBgYA0KDQpgYGB7cn0NCiMgTm8gY29uZmlkZW5jZSBpbnRlcnZhbCBpbiB0aGUgcGxvdA0KZ2dwbG90KFdITywgYWVzKHggPSBsb2coRmVydGlsaXR5UmF0ZSksIHkgPSBVbmRlcjE1KSkgKyANCiAgZ2VvbV9wb2ludCgpICsgc3RhdF9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSkNCmBgYA0KDQpgYGB7cn0NCiMgQ2hhbmdlIHRoZSBjb2xvciBvZiB0aGUgcmVncmVzc2lvbiBsaW5lOg0KZ2dwbG90KFdITywgYWVzKHggPSBsb2coRmVydGlsaXR5UmF0ZSksIHkgPSBVbmRlcjE1KSkgKyANCiAgZ2VvbV9wb2ludCgpICsgc3RhdF9zbW9vdGgobWV0aG9kID0gImxtIiwgY29sb3VyID0gIm9yYW5nZSIpDQpgYGANCg0KIyMjIyMgNy4xLjcg5YiG576k6bue54uA5ZyWDQpgYGB7cn0NCiMgcXVpei0xOg0KZ2dwbG90KFdITywgYWVzKHggPSBGZXJ0aWxpdHlSYXRlLCB5ID0gVW5kZXIxNSwgY29sPVJlZ2lvbikpICsgDQogIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlPSJBY2NlbnQiKSArDQogIGdlb21fcG9pbnQoKQ0KYGBgDQoNCiMjIyMjIDcuMS44IOWIhuagvOm7nueLgOWclg0KYGBge3J9DQojIHF1aXotMToNCmdncGxvdChXSE8sIGFlcyh4ID0gbG9nKFBvcHVsYXRpb24pLCB5ID0gR05JLCBjb2xvcj1SZWdpb24pKSArIA0KICBnZW9tX3BvaW50KCkgKyANCiAgc3RhdF9zbW9vdGgobWV0aG9kPSdsbScpICsNCiAgZmFjZXRfd3JhcCh+UmVnaW9uKSArIHRoZW1lX2J3KCkNCmBgYA0KDQo8YnI+DQoNCi0gLSAtDQoNCjxicj48YnI+PGJyPjxicj48YnI+DQoNCjxzdHlsZT4NCi5jYXB0aW9uIHsNCiAgY29sb3I6ICM3Nzc7DQogIG1hcmdpbi10b3A6IDEwcHg7DQp9DQpwIGNvZGUgew0KICB3aGl0ZS1zcGFjZTogaW5oZXJpdDsNCn0NCnByZSB7DQogIHdvcmQtYnJlYWs6IG5vcm1hbDsNCiAgd29yZC13cmFwOiBub3JtYWw7DQogIGxpbmUtaGVpZ2h0OiAxOw0KfQ0KcHJlIGNvZGUgew0KICB3aGl0ZS1zcGFjZTogaW5oZXJpdDsNCn0NCnAsbGkgew0KICBmb250LWZhbWlseTogIlRyZWJ1Y2hldCBNUyIsICLlvq7ou5/mraPpu5Hpq5QiLCAiTWljcm9zb2Z0IEpoZW5nSGVpIjsNCn0NCg0KLnJ7DQogIGxpbmUtaGVpZ2h0OiAxLjI7DQp9DQoNCnRpdGxlew0KICBjb2xvcjogI2NjMDAwMDsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmJvZHl7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoMSxoMixoMyxoNCxoNXsNCiAgY29sb3I6ICMwMDg4MDA7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQpoM3sNCiAgY29sb3I6ICNiMzZiMDA7DQogIGJhY2tncm91bmQ6ICNmZmUwYjM7DQogIGxpbmUtaGVpZ2h0OiAyOw0KICBmb250LXdlaWdodDogYm9sZDsNCn0NCg0KaDV7DQogIGNvbG9yOiAjMDA2MDAwOw0KICBiYWNrZ3JvdW5kOiAjZmZmZmUwOw0KICBsaW5lLWhlaWdodDogMjsNCiAgZm9udC13ZWlnaHQ6IGJvbGQ7DQp9DQoNCmVtew0KICBjb2xvcjogIzAwMDBjMDsNCiAgYmFja2dyb3VuZDogI2YwZjBmMDsNCiAgfQ0KPC9zdHlsZT4NCg0K