Exercises 1

The distribution of personal disposable income in Taiwan in 2015 has a story to tell. Revise the following plot to enhance that message.

dta<- read.table("C:/Nicole/Rstudio IDE code/R Data visualization/income_tw.csv", header = T,sep=",")
head(dta)
##               Income  Count
## 1  160,000 and under 807160
## 2 160,000 to 179,999 301650
## 3 180,000 to 199,999 313992
## 4 200,000 to 219,999 329290
## 5 220,000 to 239,999 369583
## 6 240,000 to 259,999 452671
str(dta)
## 'data.frame':    41 obs. of  2 variables:
##  $ Income: chr  "160,000 and under" "160,000 to 179,999" "180,000 to 199,999" "200,000 to 219,999" ...
##  $ Count : int  807160 301650 313992 329290 369583 452671 495387 517779 557786 584497 ...
summary(dta)
##     Income              Count       
##  Length:41          Min.   : 59820  
##  Class :character   1st Qu.:241508  
##  Mode  :character   Median :313992  
##                     Mean   :352494  
##                     3rd Qu.:461836  
##                     Max.   :807160
pacman::p_load(ggplot2, dplyr, stringr)

排序後的樣子,收入48000以下佔大多數人口,40~60/1000人

ggplot(dta, aes(x=Count/10000 , y=reorder(Income,Count))) +geom_point(size=rel(1)) +
  labs(x="Number of persons (x 10,000)", 
       y="",
       title = "Distribution of personal disposable income in Taiwan (2015)")+
  theme_minimal()

dta<-arrange(dta, Count)
head(dta)
##                   Income  Count
## 1     2,500,000 and over  59820
## 2 1,800,000 to 2,499,999 107955
## 3 1,250,000 to 1,399,999 188243
## 4 1,150,000 to 1,249,999 195032
## 5 1,070,000 to 1,149,999 195191
## 6 1,400,000 to 1,799,999 214910

將收入分組,不以quantile當切點,以觀察到圖分布當切點,分布看來,人口數20萬~40萬人之間 和 40萬以上有明顯pattern

dta1 <- dta %>%
    mutate(id = rep(1:n()))%>% 
    mutate(level=case_when(
    Count<200000 ~'Low',
    Count>=200000 & Count<400000 ~'Middle',
    TRUE ~'Hight'))
dta1 <- dta %>%
    mutate(id = rep(1:n()),
    cum = cumsum(Count)/sum(Count) *100,       
    level=cut(cum, 
          breaks = c(0,25,50,75,100),
          labels=c("Hight","Middle","Middle","Low"),include.lowest= T))

收入分組 低收的分布不到三萬元,中收入3~9萬元之間,高收大於9萬元。

ggplot(dta1, mapping=aes(x = Count/10000 , y=reorder(Income,-id))) +geom_boxplot() +
  labs(x=" Income Level in Taiwan (2015)", 
       y= "Number of persons (x 10,000)",
        )+
    facet_grid(~level)

   coord_flip()
## <ggproto object: Class CoordFlip, CoordCartesian, Coord, gg>
##     aspect: function
##     backtransform_range: function
##     clip: on
##     default: FALSE
##     distance: function
##     expand: TRUE
##     is_free: function
##     is_linear: function
##     labels: function
##     limits: list
##     modify_scales: function
##     range: function
##     render_axis_h: function
##     render_axis_v: function
##     render_bg: function
##     render_fg: function
##     setup_data: function
##     setup_layout: function
##     setup_panel_guides: function
##     setup_panel_params: function
##     setup_params: function
##     train_panel_guides: function
##     transform: function
##     super:  <ggproto object: Class CoordFlip, CoordCartesian, Coord, gg>

以人口數來看,高收約近17萬人,中收近20萬人,低收不到10萬人。

bar<-ggplot(data = dta1)+
    geom_bar(
    mapping=aes(x=level,fill=level),
    show.legend = FALSE, width=1)+
    #theme(aspect.ratio = 1)+
    labs(x=" Income Level in Taiwan (2015)", 
       y= "Number of persons (x 10,000)",)

bar+coord_flip()

Exercises 2

Comment on how the graphs presented in this link violate the principles for effective graphics and how would you revise them.

pacman::p_load(ggplot2, dplyr, viridis,ggthemes)

讀檔,用,將欄位隔開

E2dta <- read.table("traffic.txt",fileEncoding="big5",stringsAsFactors = F, header = T,sep=",")
head(E2dta)
##   year month                     type  law        vehicle_type method count
## 1  104     1 闖越平交道或在平交道違規 54條                汽車   逕舉     0
## 2  104     1 闖越平交道或在平交道違規 54條                汽車   攔停     0
## 3  104     1 闖越平交道或在平交道違規 54條 重型機車(250CC以上)   逕舉     0
## 4  104     1 闖越平交道或在平交道違規 54條 重型機車(250CC以上)   攔停     0
## 5  104     1 闖越平交道或在平交道違規 54條                機車   逕舉     0
## 6  104     1 闖越平交道或在平交道違規 54條                機車   攔停     0

作者原始圖檔

type <- E2dta %>%
  filter(year == 104) %>%
  group_by(type) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))

vtype <- E2dta %>%
  filter(year == 104) %>%
  group_by(vehicle_type) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))

method <- E2dta %>%
  filter(year == 104) %>%
  group_by(method) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))
summary(E2dta)
##       year           month            type               law           
##  Min.   :104.0   Min.   : 1.000   Length:9048        Length:9048       
##  1st Qu.:104.0   1st Qu.: 3.000   Class :character   Class :character  
##  Median :104.0   Median : 6.000   Mode  :character   Mode  :character  
##  Mean   :104.1   Mean   : 6.077                                        
##  3rd Qu.:104.0   3rd Qu.: 9.000                                        
##  Max.   :105.0   Max.   :12.000                                        
##  vehicle_type          method              count         
##  Length:9048        Length:9048        Min.   :    0.00  
##  Class :character   Class :character   1st Qu.:    0.00  
##  Mode  :character   Mode  :character   Median :    0.00  
##                                        Mean   :   97.63  
##                                        3rd Qu.:    0.00  
##                                        Max.   :14892.00
vtype
## # A tibble: 3 x 2
##   vehicle_type         count
##   <chr>                <int>
## 1 機車                428156
## 2 汽車                382063
## 3 重型機車(250CC以上)   4661

將取締資料依據”月份、舉發種類”、“月份、車種”與”月份、舉發方式”進行分類:

type_month <- E2dta %>%
  filter(year == 104) %>%
  group_by(month,type) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
vtype_month <- E2dta %>%
  filter(year == 104) %>%
  group_by(month,vehicle_type) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
method_month <- E2dta %>%
  filter(year == 104) %>%
  group_by(month,method) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.
ggplot(type[1:5,],aes(x=type,y =count,fill = type))+
  geom_bar(stat="identity",width=.5,alpha = 0.8) +
  geom_text(aes(label = count),size = 3.5) +
  ggtitle("104") +
  theme(text = element_text(family = "Songti SC"))
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

ggplot(vtype,aes(x=vehicle_type,y =count,fill = vehicle_type))+
  geom_bar(stat="identity",width=.5,alpha = 0.8) +
  geom_text(aes(label = count),size = 3.5) +
  ggtitle("104年交通違規舉發車種別件數") +
  theme(text = element_text(family = "Songti SC"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

ggplot(method,aes(x=method,y =count,fill = method))+
  geom_bar(stat="identity",width=.5,alpha = 0.8) +
  geom_text(aes(label = count),size = 3.5) +
  ggtitle("104年交通違規舉發方式別件數")  +
  theme(text = element_text(family = "Songti SC"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

top5type <- type[1:5,1]#取出數量前五的種類
top5 <- data.frame()#產生空的框架
for(i in 1:5){
  X <- type_month %>%
    filter(type == as.character(top5type[i,]))
  top5 <- rbind(top5,X)
}#重新取出數量前五種類的資料

ggplot(top5,aes(x=type,y =count,fill = type))+
  geom_boxplot(alpha = 0.7) +
  ggtitle("104年交通違規盒鬚圖") +
theme(text = element_text(family = "Songti SC"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

#畫出盒鬚圖

修正後

type_vtype<- E2dta %>%
  filter(year == 104) %>%
  group_by(type,vehicle_type) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count)) 
## `summarise()` has grouped output by 'type'. You can override using the `.groups` argument.
head(type_vtype)
## # A tibble: 6 x 3
## # Groups:   type [4]
##   type                   vehicle_type  count
##   <chr>                  <chr>         <int>
## 1 行車速度超速60公里以下 汽車         109896
## 2 違規停車               汽車         109338
## 3 違規停車               機車         106322
## 4 行車速度超速60公里以下 機車          77416
## 5 不依規定轉彎           機車          69390
## 6 違規停車拖吊           機車          59558

汽車和機車的違規的比較,機車在前五項違規模式中數量皆大於50000,汽車違規的情況較機車少。

top10<- type_vtype[1:10,]
ggplot(top10, aes(x=count,y=type,color=vehicle_type))+
      geom_point()+
    facet_grid(~vehicle_type)