Sys.setlocale("LC_ALL","C")
[1] "C"
packages = c(
"dplyr","ggplot2","d3heatmap","googleVis","devtools","plotly", "xgboost",
"magrittr","caTools","ROCR","corrplot", "rpart", "rpart.plot",
"doParallel", "caret", "glmnet", "Matrix", "e1071", "randomForest",
"flexclust", "FactoMineR", "factoextra", "maps", "ggmap", "igraph", "rgl",
"tm", "SnowballC", "wordcloud", "slam", "Matrix", "RColorBrewer"
)
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
Installing package into 'C:/Users/user/Documents/R/win-library/3.5'
(as 'lib' is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/maps_3.3.0.zip'
Content type 'application/zip' length 3694254 bytes (3.5 MB)
downloaded 3.5 MB
package 'maps' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\user\AppData\Local\Temp\RtmpyYVyx7\downloaded_packages
Installing package into 'C:/Users/user/Documents/R/win-library/3.5'
(as 'lib' is unspecified)
also installing the dependencies 'proto', 'RgoogleMaps', 'rjson', 'mapproj', 'jpeg', 'geosphere'
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/proto_1.0.0.zip'
Content type 'application/zip' length 471951 bytes (460 KB)
downloaded 460 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/RgoogleMaps_1.4.2.zip'
Content type 'application/zip' length 905989 bytes (884 KB)
downloaded 884 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/rjson_0.2.20.zip'
Content type 'application/zip' length 577826 bytes (564 KB)
downloaded 564 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/mapproj_1.2.6.zip'
Content type 'application/zip' length 90430 bytes (88 KB)
downloaded 88 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/jpeg_0.1-8.zip'
Content type 'application/zip' length 230437 bytes (225 KB)
downloaded 225 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/geosphere_1.5-7.zip'
Content type 'application/zip' length 977860 bytes (954 KB)
downloaded 954 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/ggmap_2.6.1.zip'
Content type 'application/zip' length 4673455 bytes (4.5 MB)
downloaded 4.5 MB
package 'proto' successfully unpacked and MD5 sums checked
package 'RgoogleMaps' successfully unpacked and MD5 sums checked
package 'rjson' successfully unpacked and MD5 sums checked
package 'mapproj' successfully unpacked and MD5 sums checked
package 'jpeg' successfully unpacked and MD5 sums checked
package 'geosphere' successfully unpacked and MD5 sums checked
package 'ggmap' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\user\AppData\Local\Temp\RtmpyYVyx7\downloaded_packages
Installing package into 'C:/Users/user/Documents/R/win-library/3.5'
(as 'lib' is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/igraph_1.2.2.zip'
Content type 'application/zip' length 9147729 bytes (8.7 MB)
downloaded 8.7 MB
package 'igraph' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\user\AppData\Local\Temp\RtmpyYVyx7\downloaded_packages
Installing package into 'C:/Users/user/Documents/R/win-library/3.5'
(as 'lib' is unspecified)
also installing the dependencies 'praise', 'testthat', 'processx', 'miniUI', 'webshot', 'manipulateWidget'
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/praise_1.0.0.zip'
Content type 'application/zip' length 19449 bytes (18 KB)
downloaded 18 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/testthat_2.0.0.zip'
Content type 'application/zip' length 1594841 bytes (1.5 MB)
downloaded 1.5 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/processx_3.1.0.zip'
Content type 'application/zip' length 1016588 bytes (992 KB)
downloaded 992 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/miniUI_0.1.1.1.zip'
Content type 'application/zip' length 36068 bytes (35 KB)
downloaded 35 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/webshot_0.5.0.zip'
Content type 'application/zip' length 1371372 bytes (1.3 MB)
downloaded 1.3 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/manipulateWidget_0.10.0.zip'
Content type 'application/zip' length 1858008 bytes (1.8 MB)
downloaded 1.8 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/rgl_0.99.16.zip'
Content type 'application/zip' length 4242708 bytes (4.0 MB)
downloaded 4.0 MB
package 'praise' successfully unpacked and MD5 sums checked
package 'testthat' successfully unpacked and MD5 sums checked
package 'processx' successfully unpacked and MD5 sums checked
package 'miniUI' successfully unpacked and MD5 sums checked
package 'webshot' successfully unpacked and MD5 sums checked
package 'manipulateWidget' successfully unpacked and MD5 sums checked
package 'rgl' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\user\AppData\Local\Temp\RtmpyYVyx7\downloaded_packages
Installing package into 'C:/Users/user/Documents/R/win-library/3.5'
(as 'lib' is unspecified)
also installing the dependencies 'NLP', 'slam', 'xml2'
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/NLP_0.1-11.zip'
Content type 'application/zip' length 375672 bytes (366 KB)
downloaded 366 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/slam_0.1-43.zip'
Content type 'application/zip' length 208246 bytes (203 KB)
downloaded 203 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/xml2_1.2.0.zip'
Content type 'application/zip' length 3605815 bytes (3.4 MB)
downloaded 3.4 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/tm_0.7-5.zip'
Content type 'application/zip' length 1362697 bytes (1.3 MB)
downloaded 1.3 MB
package 'NLP' successfully unpacked and MD5 sums checked
package 'slam' successfully unpacked and MD5 sums checked
package 'xml2' successfully unpacked and MD5 sums checked
package 'tm' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\user\AppData\Local\Temp\RtmpyYVyx7\downloaded_packages
Installing package into 'C:/Users/user/Documents/R/win-library/3.5'
(as 'lib' is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/SnowballC_0.5.1.zip'
Content type 'application/zip' length 3082565 bytes (2.9 MB)
downloaded 2.9 MB
package 'SnowballC' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\user\AppData\Local\Temp\RtmpyYVyx7\downloaded_packages
Installing package into 'C:/Users/user/Documents/R/win-library/3.5'
(as 'lib' is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/wordcloud_2.5.zip'
Content type 'application/zip' length 582324 bytes (568 KB)
downloaded 568 KB
package 'wordcloud' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\user\AppData\Local\Temp\RtmpyYVyx7\downloaded_packages
Installing package into 'C:/Users/user/Documents/R/win-library/3.5'
(as 'lib' is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/slam_0.1-43.zip'
Content type 'application/zip' length 208246 bytes (203 KB)
downloaded 203 KB
package 'slam' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\user\AppData\Local\Temp\RtmpyYVyx7\downloaded_packages
rm(list=ls(all=T))
options(digits=4, scipen=12)
library(dplyr)
library(ggplot2)
library(maps)
library(ggmap)
maps() ggmap() 是畫地圖使用 - - -
ggplot2
繪圖套件WHO = read.csv("data/WHO.csv")
# Basic Plot in R
plot(WHO$GNI, WHO$FertilityRate)
WHO\(GNI, WHO\)FertilityRate 有一個資料框,裡面有兩個欄位,可以選擇哪一個欄位是X軸,哪一個欄位是Y軸,如果續到連續數字,將會出現連動圖。
library(ggplot2)
# Create the ggplot object with the data and the aesthetic mapping:
scatterplot = ggplot(WHO, aes(x = GNI, y = FertilityRate))
ggplot()用途是把一個資料框做成繪圖物件 (x = GNI, y = FertilityRate)是指X軸是什麼,Y軸是什麼。
# Add the geom_point geometry
scatterplot + geom_point()
geom_point() 點狀圖的繪圖方式 scatterplot散佈圖
# Make a line graph instead:
scatterplot + geom_line()
# Switch back to our points:
scatterplot + geom_point()
# Redo the plot with blue triangles instead of circles:
scatterplot + geom_point(color = "blue", size = 3, shape = 21)
# Another option:
scatterplot + geom_point(color = "darkred", size = 3, shape = 8)
# Add a title to the plot:
scatterplot +
geom_point(colour = "blue", size = 3, shape = 17) +
ggtitle("Fertility Rate vs. Gross National Income")
# Save our plot:
fertilityGNIplot = scatterplot +
geom_point(colour = "blue", size = 3, shape = 17) +
ggtitle("Fertility Rate vs. Gross National Income")
pdf("MyPlot.pdf")
print(fertilityGNIplot)
dev.off()
null device
1
“MyPlot.pdf” 是指把繪完的內容,放去指定的PDF檔。 如果要加上圖的名稱或標題,則是在加上:ggtitle(‘’)
# Color the points by region:
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = Region)) +
geom_point()
# Color the points according to life expectancy:
ggplot(WHO, aes(x = GNI, y = FertilityRate, color = LifeExpectancy)) +
geom_point()
LifeExpectancy 平均壽命
# Is the fertility rate of a country was a good predictor of the
# percentage of the population under 15?
ggplot(WHO, aes(x = FertilityRate, y = Under15)) + geom_point()
ggplot(WHO, aes(x = FertilityRate, y = Under15)) FertilityRate生育率 Under15小於15歲 為了看出彼此的線性關係
# Let's try a log transformation:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) + geom_point()
# Simple linear regression model to predict the percentage of the
# population under 15, using the log of the fertility rate:
mod = lm(Under15 ~ log(FertilityRate), data = WHO)
summary(mod)
Call:
lm(formula = Under15 ~ log(FertilityRate), data = WHO)
Residuals:
Min 1Q Median 3Q Max
-10.313 -1.774 0.045 1.744 7.717
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.654 0.448 17.1 <2e-16 ***
log(FertilityRate) 22.055 0.418 52.8 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.65 on 181 degrees of freedom
(11 observations deleted due to missingness)
Multiple R-squared: 0.939, Adjusted R-squared: 0.939
F-statistic: 2.79e+03 on 1 and 181 DF, p-value: <2e-16
lm()回歸函數
# Add this regression line to our plot:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm")
# 99% confidence interval
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", level = 0.99)
有99%的信賴區間,但點未比若再99%的信賴區間 信賴區間是指平均值的信賴區間,並非各單點的信賴區間(非個別估計) 因此不能用平均值做決定,這樣可能做出很差的決定。
# No confidence interval in the plot
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", se = FALSE)
# Change the color of the regression line:
ggplot(WHO, aes(x = log(FertilityRate), y = Under15)) +
geom_point() + stat_smooth(method = "lm", colour = "orange")
# quiz-1:
ggplot(WHO, aes(x = FertilityRate, y = Under15, col=Region)) +
scale_color_brewer(palette="Accent") +
geom_point()
# quiz-1:
ggplot(WHO, aes(x = log(Population), y = GNI, color=Region)) +
geom_point() +
stat_smooth(method='lm') +
facet_wrap(~Region) + theme_bw()
facet_wrap(~Region) 這動作就是化成上面的一格一格 平滑線 stat_smooth()