Dplyr & ggplot2 &R Markdown

This is an R Markdown document.

D. 讀取臺北市所得收入者每人所得

#Install Packages
library(tidyverse)
library(dplyr) 
library(ggplot2)
library(reshape2)
library(scales)
library(lubridate)

#D1_ With geom_line

getwd()
setwd("C:/R project/3/2")
TW.Income <-read.csv("fi00127y2ac.csv",fileEncoding = "UTF-8")
TW.Income$Year <- gsub("年", "", TW.Income$Year)
TW.Income$Year <- gsub(" ", "", TW.Income$Year)
TW.Income$Year<-paste0(as.character(as.numeric(TW.Income$Year)+1911),"-01-01") %>% year()
mdata <- melt(TW.Income, id=c('Year'))
mdata1<-mdata %>% 
  filter(variable != "國小及以下") %>%
  filter(variable != '高職') %>%
  filter(variable != '專科') %>%
  filter(variable != '國.初.中.初職.') %>%
  group_by(Year,variable) %>%
  summarize(Education=variable,value)
#繪圖
ggplot(mdata1, aes(x =Year, y = value,colour=Education)) +geom_line()+scale_y_continuous(labels = label_number(suffix = "萬", scale = 1e-4))+labs(title="台北教育程度別本業薪資所得走勢2009-2021 ", x ="年度", y = "平均每人年所得",size=0.3)

#D_ With geom_linerange

mdata2<-mdata %>% 
  filter(variable != "國小及以下") %>%
  filter(variable != '高職') %>%
  filter(variable != '專科') %>%
  filter(variable != '國.初.中.初職.') %>%
  group_by(variable) %>%
  summarize(Mean_salary=mean(value),Max_salary = max(value),min_salary =min(value),value,Year)
#繪圖
ggplot(mdata2,aes(x =value, y = variable))+geom_linerange(aes(xmax =Max_salary ,xmin =min_salary),colour='purple',size=2)+scale_x_continuous(labels = label_number(suffix = "萬", scale = 1e-4))+labs(title='台北教育程度別本業薪資所得分布',x ="平均每人年所得", y = "教育程度",size=0.3)

##E. Bonus question: redo question

#E1_ With geom_line

getwd()
setwd("C:/R project/3/2")
TW1.Income <-read.csv("fi00122y12ac.csv",fileEncoding = "UTF-8")
TW1.Income$Year <- gsub("年", "", TW1.Income$Year)
TW1.Income$Year <- gsub(" ", "", TW1.Income$Year)
TW1.Income$Year<-paste0(as.character(as.numeric(TW1.Income$Year)+1911),"-01-01") %>% year()

#繪圖
ggplot(TW1.Income, aes(x =Year, y = Salary,colour=District)) +geom_line()+scale_y_continuous(labels = label_number(suffix = "萬", scale = 1e-4))+labs(title="台北行政區別本業薪資所得走勢2009-2021 ", x ="年度", y = "平均每人年所得",size=0.3)

Including E_Plot With geom_line

You can also embed plots, for example: 從下圖得知,中正區和大安區的薪資整體位於台北市行政區的前段,而萬華區則大幅落後其他地區。

#E2_ With geom_linerange

mydata<-TW1.Income %>% 
  group_by(District) %>%
  summarize(Max_salary = max(Salary),min_salary =min(Salary),Salary,Year)
#繪圖
ggplot(mydata,aes(x =Salary, y = District))+geom_linerange(aes(xmax =Max_salary ,xmin =min_salary),colour='purple',size=2)+scale_x_continuous(labels = label_number(suffix = "萬", scale = 1e-4))+labs(title='台北行政區別本業薪資所得分布',x ="平均每人年所得", y = "行政區別",size=0.3)

Including E_Plot With geom_linerange

You can also embed plots, for example: 從下方圖片,發現文山區的本業薪資分布是位於中間水平、極大值和極小值也沒有向中正區相差這麼大。