The distribution of personal disposable income in Taiwan in 2015 has a story to tell. Revise the following plot to enhance that message.
<- read.table("C:/Nicole/Rstudio IDE code/R Data visualization/income_tw.csv", header = T,sep=",") dta
head(dta)
## Income Count
## 1 160,000 and under 807160
## 2 160,000 to 179,999 301650
## 3 180,000 to 199,999 313992
## 4 200,000 to 219,999 329290
## 5 220,000 to 239,999 369583
## 6 240,000 to 259,999 452671
str(dta)
## 'data.frame': 41 obs. of 2 variables:
## $ Income: chr "160,000 and under" "160,000 to 179,999" "180,000 to 199,999" "200,000 to 219,999" ...
## $ Count : int 807160 301650 313992 329290 369583 452671 495387 517779 557786 584497 ...
summary(dta)
## Income Count
## Length:41 Min. : 59820
## Class :character 1st Qu.:241508
## Mode :character Median :313992
## Mean :352494
## 3rd Qu.:461836
## Max. :807160
::p_load(ggplot2, dplyr, stringr) pacman
排序後的樣子,收入48000以下佔大多數人口,40~60/1000人
ggplot(dta, aes(x=Count/10000 , y=reorder(Income,Count))) +geom_point(size=rel(1)) +
labs(x="Number of persons (x 10,000)",
y="",
title = "Distribution of personal disposable income in Taiwan (2015)")+
theme_minimal()
<-arrange(dta, Count) dta
head(dta)
## Income Count
## 1 2,500,000 and over 59820
## 2 1,800,000 to 2,499,999 107955
## 3 1,250,000 to 1,399,999 188243
## 4 1,150,000 to 1,249,999 195032
## 5 1,070,000 to 1,149,999 195191
## 6 1,400,000 to 1,799,999 214910
將收入分組,不以quantile當切點,以觀察到圖分布當切點,分布看來,人口數20萬~40萬人之間 和 40萬以上有明顯pattern
<- dta %>%
dta1 mutate(id = rep(1:n()))%>%
mutate(level=case_when(
<200000 ~'Low',
Count>=200000 & Count<400000 ~'Middle',
CountTRUE ~'Hight'))
<- dta %>%
dta1 mutate(id = rep(1:n()),
cum = cumsum(Count)/sum(Count) *100,
level=cut(cum,
breaks = c(0,25,50,75,100),
labels=c("Hight","Middle","Middle","Low"),include.lowest= T))
收入分組 低收的分布不到三萬元,中收入3~9萬元之間,高收大於9萬元。
ggplot(dta1, mapping=aes(x = Count/10000 , y=reorder(Income,-id))) +geom_boxplot() +
labs(x=" Income Level in Taiwan (2015)",
y= "Number of persons (x 10,000)",
+
)facet_grid(~level)
coord_flip()
## <ggproto object: Class CoordFlip, CoordCartesian, Coord, gg>
## aspect: function
## backtransform_range: function
## clip: on
## default: FALSE
## distance: function
## expand: TRUE
## is_free: function
## is_linear: function
## labels: function
## limits: list
## modify_scales: function
## range: function
## render_axis_h: function
## render_axis_v: function
## render_bg: function
## render_fg: function
## setup_data: function
## setup_layout: function
## setup_panel_guides: function
## setup_panel_params: function
## setup_params: function
## train_panel_guides: function
## transform: function
## super: <ggproto object: Class CoordFlip, CoordCartesian, Coord, gg>
以人口數來看,高收約近17萬人,中收近20萬人,低收不到10萬人。
<-ggplot(data = dta1)+
bargeom_bar(
mapping=aes(x=level,fill=level),
show.legend = FALSE, width=1)+
#theme(aspect.ratio = 1)+
labs(x=" Income Level in Taiwan (2015)",
y= "Number of persons (x 10,000)",)
+coord_flip() bar
Comment on how the graphs presented in this link violate the principles for effective graphics and how would you revise them.
::p_load(ggplot2, dplyr, viridis,ggthemes) pacman
讀檔,用,將欄位隔開
<- read.table("traffic.txt",fileEncoding="big5",stringsAsFactors = F, header = T,sep=",") E2dta
head(E2dta)
## year month type law vehicle_type method count
## 1 104 1 闖越平交道或在平交道違規 54條 汽車 逕舉 0
## 2 104 1 闖越平交道或在平交道違規 54條 汽車 攔停 0
## 3 104 1 闖越平交道或在平交道違規 54條 重型機車(250CC以上) 逕舉 0
## 4 104 1 闖越平交道或在平交道違規 54條 重型機車(250CC以上) 攔停 0
## 5 104 1 闖越平交道或在平交道違規 54條 機車 逕舉 0
## 6 104 1 闖越平交道或在平交道違規 54條 機車 攔停 0
<- E2dta %>%
type filter(year == 104) %>%
group_by(type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
<- E2dta %>%
vtype filter(year == 104) %>%
group_by(vehicle_type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
<- E2dta %>%
method filter(year == 104) %>%
group_by(method) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
summary(E2dta)
## year month type law
## Min. :104.0 Min. : 1.000 Length:9048 Length:9048
## 1st Qu.:104.0 1st Qu.: 3.000 Class :character Class :character
## Median :104.0 Median : 6.000 Mode :character Mode :character
## Mean :104.1 Mean : 6.077
## 3rd Qu.:104.0 3rd Qu.: 9.000
## Max. :105.0 Max. :12.000
## vehicle_type method count
## Length:9048 Length:9048 Min. : 0.00
## Class :character Class :character 1st Qu.: 0.00
## Mode :character Mode :character Median : 0.00
## Mean : 97.63
## 3rd Qu.: 0.00
## Max. :14892.00
vtype
## # A tibble: 3 x 2
## vehicle_type count
## <chr> <int>
## 1 機車 428156
## 2 汽車 382063
## 3 重型機車(250CC以上) 4661
將取締資料依據”月份、舉發種類”、“月份、車種”與”月份、舉發方式”進行分類:
<- E2dta %>%
type_month filter(year == 104) %>%
group_by(month,type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
<- E2dta %>%
vtype_month filter(year == 104) %>%
group_by(month,vehicle_type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
<- E2dta %>%
method_month filter(year == 104) %>%
group_by(month,method) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
ggplot(type[1:5,],aes(x=type,y =count,fill = type))+
geom_bar(stat="identity",width=.5,alpha = 0.8) +
geom_text(aes(label = count),size = 3.5) +
ggtitle("104") +
theme(text = element_text(family = "Songti SC"))
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
ggplot(vtype,aes(x=vehicle_type,y =count,fill = vehicle_type))+
geom_bar(stat="identity",width=.5,alpha = 0.8) +
geom_text(aes(label = count),size = 3.5) +
ggtitle("104年交通違規舉發車種別件數") +
theme(text = element_text(family = "Songti SC"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
ggplot(method,aes(x=method,y =count,fill = method))+
geom_bar(stat="identity",width=.5,alpha = 0.8) +
geom_text(aes(label = count),size = 3.5) +
ggtitle("104年交通違規舉發方式別件數") +
theme(text = element_text(family = "Songti SC"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
<- type[1:5,1]#取出數量前五的種類
top5type <- data.frame()#產生空的框架
top5 for(i in 1:5){
<- type_month %>%
X filter(type == as.character(top5type[i,]))
<- rbind(top5,X)
top5 #重新取出數量前五種類的資料
}
ggplot(top5,aes(x=type,y =count,fill = type))+
geom_boxplot(alpha = 0.7) +
ggtitle("104年交通違規盒鬚圖") +
theme(text = element_text(family = "Songti SC"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
#畫出盒鬚圖
<- E2dta %>%
type_vtypefilter(year == 104) %>%
group_by(type,vehicle_type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
## `summarise()` has grouped output by 'type'. You can override using the `.groups` argument.
head(type_vtype)
## # A tibble: 6 x 3
## # Groups: type [4]
## type vehicle_type count
## <chr> <chr> <int>
## 1 行車速度超速60公里以下 汽車 109896
## 2 違規停車 汽車 109338
## 3 違規停車 機車 106322
## 4 行車速度超速60公里以下 機車 77416
## 5 不依規定轉彎 機車 69390
## 6 違規停車拖吊 機車 59558
汽車和機車的違規的比較,機車在前五項違規模式中數量皆大於50000,汽車違規的情況較機車少。
<- type_vtype[1:10,]
top10ggplot(top10, aes(x=count,y=type,color=vehicle_type))+
geom_point()+
facet_grid(~vehicle_type)