Sys.setlocale("LC_ALL","C")
[1] "C"
packages = c(
"dplyr","ggplot2","d3heatmap","googleVis","devtools","plotly", "xgboost",
"magrittr","caTools","ROCR","corrplot", "rpart", "rpart.plot",
"doParallel", "caret", "glmnet", "Matrix", "e1071", "randomForest",
"flexclust", "FactoMineR", "factoextra", "maps", "ggmap", "igraph", "rgl",
"tm", "SnowballC", "wordcloud", "slam", "Matrix", "RColorBrewer"
)
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
rm(list=ls(all=T))
options(digits=4, scipen=12)
library(dplyr)
library(ggplot2)
library(maps)
library(ggmap)
7.1 ggplot2 繪圖套件
7.1.1 基本點狀圖
WHO = read.csv("data/WHO.csv")
str(WHO)
'data.frame': 194 obs. of 13 variables:
$ Country : Factor w/ 194 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
$ Region : Factor w/ 6 levels "Africa","Americas",..: 3 4 1 4 1 2 2 4 6 4 ...
$ Population : int 29825 3162 38482 78 20821 89 41087 2969 23050 8464 ...
$ Under15 : num 47.4 21.3 27.4 15.2 47.6 ...
$ Over60 : num 3.82 14.93 7.17 22.86 3.84 ...
$ FertilityRate : num 5.4 1.75 2.83 NA 6.1 2.12 2.2 1.74 1.89 1.44 ...
$ LifeExpectancy : int 60 74 73 82 51 75 76 71 82 81 ...
$ ChildMortality : num 98.5 16.7 20 3.2 163.5 ...
$ CellularSubscribers : num 54.3 96.4 99 75.5 48.4 ...
$ LiteracyRate : num NA NA NA NA 70.1 99 97.8 99.6 NA NA ...
$ GNI : num 1140 8820 8310 NA 5230 ...
$ PrimarySchoolEnrollmentMale : num NA NA 98.2 78.4 93.1 91.1 NA NA 96.9 NA ...
$ PrimarySchoolEnrollmentFemale: num NA NA 96.4 79.4 78.2 84.5 NA NA 97.5 NA ...
# Basic Plot in R
plot(WHO$GNI, WHO$FertilityRate)

library(ggplot2)
# Create the ggplot object with the data and the aesthetic mapping:
#gglot2可用來繪製圖片
scatterplot = ggplot(WHO , aes(x = GNI , y = FertilityRate))
# Add the geom_point geometry
scatterplot + geom_point()

# Make a line graph instead:
scatterplot + geom_line()

# Switch back to our points:
scatterplot + geom_point()

# Redo the plot with blue triangles instead of circles:
scatterplot + geom_point(color = "blue", size = 3, shape = 21)

#增加顏色為藍色、大小設定、shape=17(三角形)
scatterplot + geom_point(color = "blue" , size = 3 , shape= 17)

# Another option:
scatterplot + geom_point(color = "darkred", size = 3, shape = 8)

# Add a title to the plot:
scatterplot +
geom_point(colour = "blue", size = 3, shape = 17) +
ggtitle("Fertility Rate vs. Gross National Income")

7.1.2 儲存圖檔
# Save our plot:
fertilityGNIplot = scatterplot + geom_point(colour = "blue", size = 3, shape = 17) + ggtitle("Fertility Rate vs. Gross National Income")
#fertilityGNIplot = scatterplot + geom_point(color = "darkred" , size = 3 , shape= 8) + ggtitle("Fertility Rate vs. Gross National Income")
pdf("MyPlot.pdf")
print(fertilityGNIplot)
dev.off()
null device
1
7.1.3 圖形元件屬性
# Color the points by region:
ggplot(WHO , aes(x= GNI , y= FertilityRate , color = Region)) + geom_point()

# Color the points according to life expectancy:
ggplot(WHO , aes(x= GNI , y= FertilityRate , color = LifeExpectancy)) + geom_point()

# Is the fertility rate of a country was a good predictor of the
# percentage of the population under 15?
ggplot(WHO , aes(x= FertilityRate , y=Under15 )) + geom_point()

7.1.4 數值尺度比例轉換
# Let's try a log transformation:
ggplot(WHO , aes(x= log(FertilityRate) , y=Under15 )) + geom_point()

7.1.5 回歸趨勢線
# Simple linear regression model to predict the percentage of the
# population under 15, using the log of the fertility rate:
mod = lm(Under15 ~ log(FertilityRate), data = WHO)
#顯示線性模型摘要
summary(mod)
Call:
lm(formula = Under15 ~ log(FertilityRate), data = WHO)
Residuals:
Min 1Q Median 3Q Max
-10.313 -1.774 0.045 1.744 7.717
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.654 0.448 17.1 <2e-16 ***
log(FertilityRate) 22.055 0.418 52.8 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.65 on 181 degrees of freedom
(11 observations deleted due to missingness)
Multiple R-squared: 0.939, Adjusted R-squared: 0.939
F-statistic: 2.79e+03 on 1 and 181 DF, p-value: <2e-16
# Add this regression line to our plot:
ggplot(WHO , aes(x= log(FertilityRate) , y=Under15 )) + geom_point() + stat_smooth(method = "lm")

7.1.6 趨勢線的信賴區間
# 99% confidence interval
ggplot(WHO , aes(x= log(FertilityRate) , y=Under15 )) + geom_point() + stat_smooth(method = "lm" , level = 0.99)

# No confidence interval in the plot
ggplot(WHO , aes(x= log(FertilityRate) , y=Under15 )) + geom_point() + stat_smooth(method = "lm" , se = FALSE )

# Change the color of the regression line:
ggplot(WHO , aes(x= log(FertilityRate) , y=Under15 )) + geom_point() + stat_smooth(method = "lm" , se = FALSE , color = "orange")

7.1.7 分群點狀圖
# quiz-1:
ggplot(WHO, aes(x = FertilityRate, y = Under15 , color = Region)) + geom_point()+scale_color_brewer(palette="Dark2")

7.1.8 分格點狀圖
# quiz-1:
ggplot(WHO, aes(x = log(Population), y = GNI, color=Region)) +
geom_point() +
stat_smooth(method='lm') +
facet_wrap(~Region) + theme_bw()

LS0tDQp0aXRsZTogIkFTNy0wQSBnZ3Bsb3QyIOe5quWcluWll+S7tiINCmF1dGhvcjogIuaWvemHh+W9oyBNMDY0MDIwMDE3LCAyMDE4LzA4LzAzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KPGJyPg0KDQpgYGB7cn0NClN5cy5zZXRsb2NhbGUoIkxDX0FMTCIsIkMiKQ0KcGFja2FnZXMgPSBjKA0KICAiZHBseXIiLCJnZ3Bsb3QyIiwiZDNoZWF0bWFwIiwiZ29vZ2xlVmlzIiwiZGV2dG9vbHMiLCJwbG90bHkiLCAieGdib29zdCIsDQogICJtYWdyaXR0ciIsImNhVG9vbHMiLCJST0NSIiwiY29ycnBsb3QiLCAicnBhcnQiLCAicnBhcnQucGxvdCIsDQogICJkb1BhcmFsbGVsIiwgImNhcmV0IiwgImdsbW5ldCIsICJNYXRyaXgiLCAiZTEwNzEiLCAicmFuZG9tRm9yZXN0IiwNCiAgImZsZXhjbHVzdCIsICJGYWN0b01pbmVSIiwgImZhY3RvZXh0cmEiLCAibWFwcyIsICJnZ21hcCIsICJpZ3JhcGgiLCAicmdsIiwNCiAgInRtIiwgIlNub3diYWxsQyIsICJ3b3JkY2xvdWQiLCAic2xhbSIsICJNYXRyaXgiLCAiUkNvbG9yQnJld2VyIg0KICApDQpleGlzdGluZyA9IGFzLmNoYXJhY3RlcihpbnN0YWxsZWQucGFja2FnZXMoKVssMV0pDQpmb3IocGtnIGluIHBhY2thZ2VzWyEocGFja2FnZXMgJWluJSBleGlzdGluZyldKSBpbnN0YWxsLnBhY2thZ2VzKHBrZykNCmBgYA0KDQpgYGB7ciBlY2hvPVQsIG1lc3NhZ2U9RiwgY2FjaGU9Riwgd2FybmluZz1GfQ0Kcm0obGlzdD1scyhhbGw9VCkpDQpvcHRpb25zKGRpZ2l0cz00LCBzY2lwZW49MTIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShtYXBzKQ0KbGlicmFyeShnZ21hcCkNCmBgYA0KDQotIC0gLQ0KDQojIyMgNy4xIGBnZ3Bsb3QyYCDnuarlnJblpZfku7YNCg0KIyMjIyMgNy4xLjEg5Z+65pys6bue54uA5ZyWDQpgYGB7cn0NCldITyA9IHJlYWQuY3N2KCJkYXRhL1dITy5jc3YiKQ0Kc3RyKFdITykNCmBgYA0KDQpgYGB7cn0NCiMgQmFzaWMgUGxvdCBpbiBSIA0KcGxvdChXSE8kR05JLCBXSE8kRmVydGlsaXR5UmF0ZSkNCmBgYA0KDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCiMgQ3JlYXRlIHRoZSBnZ3Bsb3Qgb2JqZWN0IHdpdGggdGhlIGRhdGEgYW5kIHRoZSBhZXN0aGV0aWMgbWFwcGluZzoNCiNnZ2xvdDLlj6/nlKjkvobnuaroo73lnJbniYcNCnNjYXR0ZXJwbG90ID0gZ2dwbG90KFdITyAsIGFlcyh4ID0gR05JICwgeSA9IEZlcnRpbGl0eVJhdGUpKQ0KYGBgDQoNCmBgYHtyfQ0KIyBBZGQgdGhlIGdlb21fcG9pbnQgZ2VvbWV0cnkNCnNjYXR0ZXJwbG90ICsgZ2VvbV9wb2ludCgpIA0KYGBgDQoNCmBgYHtyfQ0KIyBNYWtlIGEgbGluZSBncmFwaCBpbnN0ZWFkOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX2xpbmUoKQ0KYGBgDQoNCmBgYHtyfQ0KIyBTd2l0Y2ggYmFjayB0byBvdXIgcG9pbnRzOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX3BvaW50KCkNCmBgYA0KDQpgYGB7cn0NCiMgUmVkbyB0aGUgcGxvdCB3aXRoIGJsdWUgdHJpYW5nbGVzIGluc3RlYWQgb2YgY2lyY2xlczoNCnNjYXR0ZXJwbG90ICsgZ2VvbV9wb2ludChjb2xvciA9ICJibHVlIiwgc2l6ZSA9IDMsIHNoYXBlID0gMjEpDQoj5aKe5Yqg6aGP6Imy54K66JeN6Imy44CB5aSn5bCP6Kit5a6a44CBc2hhcGU9MTco5LiJ6KeS5b2iKQ0Kc2NhdHRlcnBsb3QgKyBnZW9tX3BvaW50KGNvbG9yID0gImJsdWUiICwgc2l6ZSA9IDMgLCBzaGFwZT0gMTcpDQpgYGANCg0KYGBge3J9DQojIEFub3RoZXIgb3B0aW9uOg0Kc2NhdHRlcnBsb3QgKyBnZW9tX3BvaW50KGNvbG9yID0gImRhcmtyZWQiLCBzaXplID0gMywgc2hhcGUgPSA4KQ0KYGBgDQoNCmBgYHtyfQ0KIyBBZGQgYSB0aXRsZSB0byB0aGUgcGxvdDoNCnNjYXR0ZXJwbG90ICsgDQogIGdlb21fcG9pbnQoY29sb3VyID0gImJsdWUiLCBzaXplID0gMywgc2hhcGUgPSAxNykgKyANCiAgZ2d0aXRsZSgiRmVydGlsaXR5IFJhdGUgdnMuIEdyb3NzIE5hdGlvbmFsIEluY29tZSIpDQpgYGANCg0KDQojIyMjIyA3LjEuMiDlhLLlrZjlnJbmqpQNCmBgYHtyfQ0KIyBTYXZlIG91ciBwbG90Og0KZmVydGlsaXR5R05JcGxvdCA9IHNjYXR0ZXJwbG90ICsgZ2VvbV9wb2ludChjb2xvdXIgPSAiYmx1ZSIsIHNpemUgPSAzLCBzaGFwZSA9IDE3KSArIGdndGl0bGUoIkZlcnRpbGl0eSBSYXRlIHZzLiBHcm9zcyBOYXRpb25hbCBJbmNvbWUiKQ0KI2ZlcnRpbGl0eUdOSXBsb3QgPSBzY2F0dGVycGxvdCArIGdlb21fcG9pbnQoY29sb3IgPSAiZGFya3JlZCIgLCBzaXplID0gMyAsIHNoYXBlPSA4KSArIGdndGl0bGUoIkZlcnRpbGl0eSBSYXRlIHZzLiBHcm9zcyBOYXRpb25hbCBJbmNvbWUiKQ0KcGRmKCJNeVBsb3QucGRmIikNCnByaW50KGZlcnRpbGl0eUdOSXBsb3QpDQpkZXYub2ZmKCkNCg0KYGBgDQoNCiMjIyMjIDcuMS4zIOWcluW9ouWFg+S7tuWxrOaApw0KYGBge3J9DQojIENvbG9yIHRoZSBwb2ludHMgYnkgcmVnaW9uOg0KZ2dwbG90KFdITyAsIGFlcyh4PSBHTkkgLCB5PSBGZXJ0aWxpdHlSYXRlICwgY29sb3IgPSBSZWdpb24pKSArIGdlb21fcG9pbnQoKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDb2xvciB0aGUgcG9pbnRzIGFjY29yZGluZyB0byBsaWZlIGV4cGVjdGFuY3k6DQpnZ3Bsb3QoV0hPICwgYWVzKHg9IEdOSSAsIHk9IEZlcnRpbGl0eVJhdGUgLCBjb2xvciA9IExpZmVFeHBlY3RhbmN5KSkgKyBnZW9tX3BvaW50KCkNCmBgYA0KDQpgYGB7cn0NCiMgSXMgdGhlIGZlcnRpbGl0eSByYXRlIG9mIGEgY291bnRyeSB3YXMgYSBnb29kIHByZWRpY3RvciBvZiB0aGUgDQojIHBlcmNlbnRhZ2Ugb2YgdGhlIHBvcHVsYXRpb24gdW5kZXIgMTU/DQpnZ3Bsb3QoV0hPICwgYWVzKHg9IEZlcnRpbGl0eVJhdGUgLCB5PVVuZGVyMTUgKSkgKyBnZW9tX3BvaW50KCkNCmBgYA0KDQojIyMjIyA3LjEuNCDmlbjlgLzlsLrluqbmr5TkvovovYnmj5sNCmBgYHtyfQ0KIyBMZXQncyB0cnkgYSBsb2cgdHJhbnNmb3JtYXRpb246DQpnZ3Bsb3QoV0hPICwgYWVzKHg9IGxvZyhGZXJ0aWxpdHlSYXRlKSAsIHk9VW5kZXIxNSApKSArIGdlb21fcG9pbnQoKQ0KYGBgDQoNCg0KIyMjIyMgNy4xLjUg5Zue5q246Lao5Yui57eaDQpgYGB7cn0NCiMgU2ltcGxlIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsIHRvIHByZWRpY3QgdGhlIHBlcmNlbnRhZ2Ugb2YgdGhlIA0KIyBwb3B1bGF0aW9uIHVuZGVyIDE1LCB1c2luZyB0aGUgbG9nIG9mIHRoZSBmZXJ0aWxpdHkgcmF0ZToNCm1vZCA9IGxtKFVuZGVyMTUgfiBsb2coRmVydGlsaXR5UmF0ZSksIGRhdGEgPSBXSE8pDQoj6aGv56S657ea5oCn5qih5Z6L5pGY6KaBDQpzdW1tYXJ5KG1vZCkNCmBgYA0KDQpgYGB7cn0NCiMgQWRkIHRoaXMgcmVncmVzc2lvbiBsaW5lIHRvIG91ciBwbG90Og0KZ2dwbG90KFdITyAsIGFlcyh4PSBsb2coRmVydGlsaXR5UmF0ZSkgLCB5PVVuZGVyMTUgKSkgKyBnZW9tX3BvaW50KCkgKyBzdGF0X3Ntb290aChtZXRob2QgPSAibG0iKQ0KYGBgDQoNCiMjIyMjIDcuMS42IOi2qOWLoue3mueahOS/oeiztOWNgOmWkw0KYGBge3J9DQojIDk5JSBjb25maWRlbmNlIGludGVydmFsDQpnZ3Bsb3QoV0hPICwgYWVzKHg9IGxvZyhGZXJ0aWxpdHlSYXRlKSAsIHk9VW5kZXIxNSApKSArIGdlb21fcG9pbnQoKSArIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIgLCBsZXZlbCA9IDAuOTkpDQpgYGANCg0KYGBge3J9DQojIE5vIGNvbmZpZGVuY2UgaW50ZXJ2YWwgaW4gdGhlIHBsb3QNCmdncGxvdChXSE8gLCBhZXMoeD0gbG9nKEZlcnRpbGl0eVJhdGUpICwgeT1VbmRlcjE1ICkpICsgZ2VvbV9wb2ludCgpICsgc3RhdF9zbW9vdGgobWV0aG9kID0gImxtIiAsIHNlID0gRkFMU0UgKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDaGFuZ2UgdGhlIGNvbG9yIG9mIHRoZSByZWdyZXNzaW9uIGxpbmU6DQpnZ3Bsb3QoV0hPICwgYWVzKHg9IGxvZyhGZXJ0aWxpdHlSYXRlKSAsIHk9VW5kZXIxNSApKSArIGdlb21fcG9pbnQoKSArIHN0YXRfc21vb3RoKG1ldGhvZCA9ICJsbSIgLCBzZSA9IEZBTFNFICwgY29sb3IgPSAib3JhbmdlIikNCmBgYA0KDQojIyMjIyA3LjEuNyDliIbnvqTpu57ni4DlnJYNCmBgYHtyfQ0KIyBxdWl6LTE6DQpnZ3Bsb3QoV0hPLCBhZXMoeCA9IEZlcnRpbGl0eVJhdGUsIHkgPSBVbmRlcjE1ICwgY29sb3IgPSBSZWdpb24pKSArIGdlb21fcG9pbnQoKStzY2FsZV9jb2xvcl9icmV3ZXIocGFsZXR0ZT0iRGFyazIiKQ0KDQpgYGANCg0KIyMjIyMgNy4xLjgg5YiG5qC86bue54uA5ZyWDQpgYGB7cn0NCiMgcXVpei0xOg0KZ2dwbG90KFdITywgYWVzKHggPSBsb2coUG9wdWxhdGlvbiksIHkgPSBHTkksIGNvbG9yPVJlZ2lvbikpICsgDQogIGdlb21fcG9pbnQoKSArIA0KICBzdGF0X3Ntb290aChtZXRob2Q9J2xtJykgKw0KICBmYWNldF93cmFwKH5SZWdpb24pICsgdGhlbWVfYncoKQ0KYGBgDQoNCjxicj4NCg0KLSAtIC0NCg0KPGJyPjxicj48YnI+PGJyPjxicj4NCg0KPHN0eWxlPg0KLmNhcHRpb24gew0KICBjb2xvcjogIzc3NzsNCiAgbWFyZ2luLXRvcDogMTBweDsNCn0NCnAgY29kZSB7DQogIHdoaXRlLXNwYWNlOiBpbmhlcml0Ow0KfQ0KcHJlIHsNCiAgd29yZC1icmVhazogbm9ybWFsOw0KICB3b3JkLXdyYXA6IG5vcm1hbDsNCiAgbGluZS1oZWlnaHQ6IDE7DQp9DQpwcmUgY29kZSB7DQogIHdoaXRlLXNwYWNlOiBpbmhlcml0Ow0KfQ0KcCxsaSB7DQogIGZvbnQtZmFtaWx5OiAiVHJlYnVjaGV0IE1TIiwgIuW+rui7n+ato+m7kemrlCIsICJNaWNyb3NvZnQgSmhlbmdIZWkiOw0KfQ0KDQoucnsNCiAgbGluZS1oZWlnaHQ6IDEuMjsNCn0NCg0KdGl0bGV7DQogIGNvbG9yOiAjY2MwMDAwOw0KICBmb250LWZhbWlseTogIlRyZWJ1Y2hldCBNUyIsICLlvq7ou5/mraPpu5Hpq5QiLCAiTWljcm9zb2Z0IEpoZW5nSGVpIjsNCn0NCg0KYm9keXsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmgxLGgyLGgzLGg0LGg1ew0KICBjb2xvcjogIzAwODgwMDsNCiAgZm9udC1mYW1pbHk6ICJUcmVidWNoZXQgTVMiLCAi5b6u6Luf5q2j6buR6auUIiwgIk1pY3Jvc29mdCBKaGVuZ0hlaSI7DQp9DQoNCmgzew0KICBjb2xvcjogI2IzNmIwMDsNCiAgYmFja2dyb3VuZDogI2ZmZTBiMzsNCiAgbGluZS1oZWlnaHQ6IDI7DQogIGZvbnQtd2VpZ2h0OiBib2xkOw0KfQ0KDQpoNXsNCiAgY29sb3I6ICMwMDYwMDA7DQogIGJhY2tncm91bmQ6ICNmZmZmZTA7DQogIGxpbmUtaGVpZ2h0OiAyOw0KICBmb250LXdlaWdodDogYm9sZDsNCn0NCg0KZW17DQogIGNvbG9yOiAjMDAwMGMwOw0KICBiYWNrZ3JvdW5kOiAjZjBmMGYwOw0KICB9DQo8L3N0eWxlPg0KDQo=