This is an R Markdown document.
D. 讀取臺北市所得收入者每人所得
#Install Packages
library(tidyverse)
library(dplyr)
library(ggplot2)
library(reshape2)
library(scales)
library(lubridate)
#D1_ With geom_line
getwd()
setwd("C:/R project/3/2")
TW.Income <-read.csv("fi00127y2ac.csv",fileEncoding = "UTF-8")
TW.Income$Year <- gsub("年", "", TW.Income$Year)
TW.Income$Year <- gsub(" ", "", TW.Income$Year)
TW.Income$Year<-paste0(as.character(as.numeric(TW.Income$Year)+1911),"-01-01") %>% year()
mdata <- melt(TW.Income, id=c('Year'))
mdata1<-mdata %>%
filter(variable != "國小及以下") %>%
filter(variable != '高職') %>%
filter(variable != '專科') %>%
filter(variable != '國.初.中.初職.') %>%
group_by(Year,variable) %>%
summarize(Education=variable,value)
#繪圖
ggplot(mdata1, aes(x =Year, y = value,colour=Education)) +geom_line()+scale_y_continuous(labels = label_number(suffix = "萬", scale = 1e-4))+labs(title="台北教育程度別本業薪資所得走勢2009-2021 ", x ="年度", y = "平均每人年所得",size=0.3)
#D_ With geom_linerange
mdata2<-mdata %>%
filter(variable != "國小及以下") %>%
filter(variable != '高職') %>%
filter(variable != '專科') %>%
filter(variable != '國.初.中.初職.') %>%
group_by(variable) %>%
summarize(Mean_salary=mean(value),Max_salary = max(value),min_salary =min(value),value,Year)
#繪圖
ggplot(mdata2,aes(x =value, y = variable))+geom_linerange(aes(xmax =Max_salary ,xmin =min_salary),colour='purple',size=2)+scale_x_continuous(labels = label_number(suffix = "萬", scale = 1e-4))+labs(title='台北教育程度別本業薪資所得分布',x ="平均每人年所得", y = "教育程度",size=0.3)
##E. Bonus question: redo question
#E1_ With geom_line
getwd()
setwd("C:/R project/3/2")
TW1.Income <-read.csv("fi00122y12ac.csv",fileEncoding = "UTF-8")
TW1.Income$Year <- gsub("年", "", TW1.Income$Year)
TW1.Income$Year <- gsub(" ", "", TW1.Income$Year)
TW1.Income$Year<-paste0(as.character(as.numeric(TW1.Income$Year)+1911),"-01-01") %>% year()
#繪圖
ggplot(TW1.Income, aes(x =Year, y = Salary,colour=District)) +geom_line()+scale_y_continuous(labels = label_number(suffix = "萬", scale = 1e-4))+labs(title="台北行政區別本業薪資所得走勢2009-2021 ", x ="年度", y = "平均每人年所得",size=0.3)
You can also embed plots, for example:
從下圖得知,中正區和大安區的薪資整體位於台北市行政區的前段,而萬華區則大幅落後其他地區。
#E2_ With geom_linerange
mydata<-TW1.Income %>%
group_by(District) %>%
summarize(Max_salary = max(Salary),min_salary =min(Salary),Salary,Year)
#繪圖
ggplot(mydata,aes(x =Salary, y = District))+geom_linerange(aes(xmax =Max_salary ,xmin =min_salary),colour='purple',size=2)+scale_x_continuous(labels = label_number(suffix = "萬", scale = 1e-4))+labs(title='台北行政區別本業薪資所得分布',x ="平均每人年所得", y = "行政區別",size=0.3)
You can also embed plots, for example: 從下方圖片,發現文山區的本業薪資分布是位於中間水平、極大值和極小值也沒有向中正區相差這麼大。