library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("C:/Users/User/Google 雲端硬碟/政治大學ECO/R")
dead<-read.csv("2012-2017dead.csv",stringsAsFactors = F)
head(arrange(.data = dead,pop))
## year county sex age cause pop num
## 1 2014 Lienchiang County 2 21 1 0 0
## 2 2014 Lienchiang County 2 21 2 0 0
## 3 2014 Lienchiang County 2 21 3 0 0
## 4 2014 Lienchiang County 2 21 4 0 0
## 5 2014 Lienchiang County 2 21 5 0 0
## 6 2014 Lienchiang County 2 21 6 0 0
head(arrange(.data = dead,desc(pop)))
## year county sex age cause pop num
## 1 2012 New Taipei City 2 7 1 184883 0
## 2 2012 New Taipei City 2 7 2 184883 1
## 3 2012 New Taipei City 2 7 3 184883 0
## 4 2012 New Taipei City 2 7 4 184883 0
## 5 2012 New Taipei City 2 7 5 184883 1
## 6 2012 New Taipei City 2 7 6 184883 1
head(filter(.data =dead,year==2014 ))
## year county sex age cause pop num
## 1 2014 Changhua County 1 1 1 28476 0
## 2 2014 Changhua County 1 1 2 28476 0
## 3 2014 Changhua County 1 1 3 28476 2
## 4 2014 Changhua County 1 1 4 28476 0
## 5 2014 Changhua County 1 1 5 28476 0
## 6 2014 Changhua County 1 1 6 28476 1
head(select(.data = dead,year,sex,age))
## year sex age
## 1 2012 1 1
## 2 2012 1 1
## 3 2012 1 1
## 4 2012 1 1
## 5 2012 1 1
## 6 2012 1 1
#用負號表示不要的變數
head(select(.data = dead,-year,-sex,-age))
## county cause pop num
## 1 Changhua County 1 29188 0
## 2 Changhua County 2 29188 0
## 3 Changhua County 3 29188 0
## 4 Changhua County 4 29188 0
## 5 Changhua County 5 29188 0
## 6 Changhua County 6 29188 0
head(mutate(.data = dead,rate=num/pop))
## year county sex age cause pop num rate
## 1 2012 Changhua County 1 1 1 29188 0 0
## 2 2012 Changhua County 1 1 2 29188 0 0
## 3 2012 Changhua County 1 1 3 29188 0 0
## 4 2012 Changhua County 1 1 4 29188 0 0
## 5 2012 Changhua County 1 1 5 29188 0 0
## 6 2012 Changhua County 1 1 6 29188 0 0
dead %>% select(year,sex,age) %>% head()
## year sex age
## 1 2012 1 1
## 2 2012 1 1
## 3 2012 1 1
## 4 2012 1 1
## 5 2012 1 1
## 6 2012 1 1
dead %>%
group_by(sex) %>%
summarise(sum=sum(as.numeric(pop)))
## # A tibble: 2 x 2
## sex sum
## <int> <dbl>
## 1 1 9477926235
## 2 2 9520111980
dead %>%
filter(cause %in% c(1:10)) %>%
head()
## year county sex age cause pop num
## 1 2012 Changhua County 1 1 1 29188 0
## 2 2012 Changhua County 1 1 2 29188 0
## 3 2012 Changhua County 1 1 3 29188 0
## 4 2012 Changhua County 1 1 4 29188 0
## 5 2012 Changhua County 1 1 5 29188 0
## 6 2012 Changhua County 1 1 6 29188 0
plot(x = c(1,3,5,3,6,2),y = c(7,4,6,3,9,5))
library(ggplot2)
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables:
## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
summary(diamonds)
## carat cut color clarity
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066
## Max. :5.0100 I: 5422 VVS1 : 3655
## J: 2808 (Other): 2531
## depth table price x
## Min. :43.00 Min. :43.00 Min. : 326 Min. : 0.000
## 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710
## Median :61.80 Median :57.00 Median : 2401 Median : 5.700
## Mean :61.75 Mean :57.46 Mean : 3933 Mean : 5.731
## 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540
## Max. :79.00 Max. :95.00 Max. :18823 Max. :10.740
##
## y z
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.720 1st Qu.: 2.910
## Median : 5.710 Median : 3.530
## Mean : 5.735 Mean : 3.539
## 3rd Qu.: 6.540 3rd Qu.: 4.040
## Max. :58.900 Max. :31.800
##
ggplot(data = diamonds,aes(x = carat,y = price))
diamonds<-diamonds
df1<-diamonds %>%
group_by(color) %>%
summarise(n=length(cut))
# Basic barplot
ggplot(data = df1,aes(x = color,y = n))+
geom_bar(stat="identity")
# Horizontal bar plot
ggplot(data = df1,aes(x = color,y = n))+
geom_bar(stat="identity")+
coord_flip()
# Change the width of bars, Change colors
ggplot(data = df1,aes(x = color,y = n))+
geom_bar(stat="identity",width=0.5,
color="blue",fill="white")
# Outside bars or nside bars
ggplot(data = df1,aes(x = color,y = n))+
geom_bar(stat="identity",color="blue",fill="white")+
geom_text(aes(label=n), vjust=-0.3, size=3.5)
# Change barplot color(or fill) by groups
ggplot(data = df1,aes(x = color,y = n,
color=color,fill=color))+
geom_bar(stat="identity")+
geom_text(aes(label=n), vjust=-0.3, size=3.5)
# Use custom color(or fill) palettes
#https://coolors.co/
#http://nipponcolors.com/
#http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3
ggplot(data = df1,aes(x = color,y = n,fill=color))+
geom_bar(stat="identity")+
geom_text(aes(label=n), vjust=-0.3, size=3.5)+
scale_fill_manual(values=c("#999999","#E69F00",
"#56B4E9","#9E7A7A",
"#74673E","#2EA9DF",
"#707C74"))
# move legend (top bottom none)
ggplot(data = df1,aes(x = color,y = n,fill=color))+
geom_bar(stat="identity")+
geom_text(aes(label=n), vjust=-0.3, size=3.5)+
scale_fill_manual(values=c("#999999","#E69F00",
"#56B4E9","#9E7A7A",
"#74673E","#2EA9DF",
"#707C74"))+
theme(legend.position="top")
# Change the order of items in the legend
ggplot(data = df1,aes(x = color,y = n,fill=color))+
geom_bar(stat="identity")+
geom_text(aes(label=n), vjust=-0.3, size=3.5)+
scale_fill_manual(values=c("#999999","#E69F00",
"#56B4E9","#9E7A7A",
"#74673E","#2EA9DF",
"#707C74"))+
scale_x_discrete(limits=c("J","I","H","G","F","E","D"))
# title
ggplot(data = df1,aes(x = color,y = n,fill=color))+
geom_bar(stat="identity")+
geom_text(aes(label=n), vjust=-0.3, size=3.5)+
scale_fill_manual(values=c("#999999","#E69F00",
"#56B4E9","#9E7A7A",
"#74673E","#2EA9DF",
"#707C74"))+
labs(title="Bar chart",x="鑽石顏色",y="數量")+#加標題軸名
theme(plot.title=element_text(size = 30,hjust = 0.5),
axis.text.x=element_text(size = 15),
axis.text.y=element_text(size = 15),
axis.title.x=element_text(size = 25),
axis.title.y=element_text(size = 25))#修改字體
ggplot(data=diamonds,aes(x=carat,y=price))+#生成畫布
geom_point(aes(color=cut))+#選擇類型
geom_smooth(color="#2ca25f")+#畫線
scale_color_manual(values =c("#7fc97f","#beaed4",
"#fdc086","#ffff99",
"#386cb0"),
guide=guide_legend(title = "Cut"))+#修改color和圖例
labs(title="散佈圖",x="鑽石重量",y="價格")+#加標題軸名
theme(plot.title=element_text(size = 30,hjust = 0.5),
axis.text.x=element_text(size = 15),
axis.text.y=element_text(size = 15),
axis.title.x=element_text(size = 25),
axis.title.y=element_text(size = 25))+#修改字體
coord_cartesian(xlim = c(0,3))+#x軸或y軸上下限
theme(legend.position = "top")#圖例位置
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
df2<-dead %>%
group_by(year) %>%
summarise(sum=sum(num))
ggplot(data = df2,aes(x = year,y = sum))+
geom_line()+
geom_point()+
labs(title="歷年死亡人口",x="年度",y="人數")