Data

#
dta <- read.csv("data0419.csv", header = T)
options(digits = 3)
pacman::p_load(tidyverse, ggplot2)
dta <- dta %>% mutate( Gender = relevel(Gender, ref = "女"),
                       Sector = relevel(Sector, ref = "私立"),
                       Field1 = relevel(Field1, ref = "遊憩與運動學群"),
                       EduLv = factor(EduLv, levels=c("博士","碩士","普通大學","科技大學",
                                                      "技術學院","五專","三專",
                                                      "二專","高中","高職","國中")),
                       EduLv = relevel(EduLv, ref = "技術學院"),
                       Region = factor(Region, levels =c("宜花東離島","北北基","桃竹苗",
                                                         "中彰投","雲嘉南","高屏澎")),
                       Age = as.numeric(Age), 
                       J_year = as.numeric(J_year), 
                       JobZone = as.numeric(JobZone),
                       EduZone = as.numeric(EduZone),
                       JobZone_D = as.numeric(EduZone-JobZone),
                       Salary = as.numeric(Salary),
                       SubEduOver = relevel(SubEduOver, ref="符合工作要求"),
                       Core = recode_factor(as.factor(JobCor), "1" = "無關聯",
                                            "2" = "部分關聯",
                                            "3" = "核心關聯"),
                       SubEduOver = factor(SubEduOver,levels =c("符合工作要求","高於工作要求","低於工作要求")))%>%  
  filter(Age >= 20)
# data construction
glimpse(dta)

## Observations: 1,568
## Variables: 25
## $ SID         <fctr> A10, A100, A103, A104, A105, A106, A107, A108, A1...
## $ Gender      <fctr> 女, 女, 男, 女, 女, 女, 女, 女, 女, 男, 男, 男, 女, 男, 男, 女, 女...
## $ Sector      <fctr> 國立（公立）, 私立, 國立（公立）, 國立（公立）, 國立（公立）, 私立, 私立, 國立（公立...
## $ EduLv       <fctr> 碩士, 普通大學, 高職, 普通大學, 普通大學, 普通大學, 普通大學, 普通大學, 碩士, 碩...
## $ SubEduOver  <fctr> 高於工作要求, 符合工作要求, 符合工作要求, 符合工作要求, 符合工作要求, 符合工作要求, 符...
## $ Require     <fctr> 高中/高職, 普通大學, 高中/高職, 普通大學, 普通大學, 普通大學, 普通大學, 普通大學,...
## $ Field1      <fctr> 資訊學群, 外語學群, 工程學群, 文史哲學群, 文史哲學群, 大眾傳播學群, 大眾傳播學群, 藝...
## $ City        <fctr> 高雄市, 苗栗縣, 高雄市, 南投縣, 嘉義市, 臺北市, 臺北市, 南投縣, 高雄市, 臺中市,...
## $ Category    <fctr> 受雇於公營機關, 受雇於公營機關, 受雇者於私營企業, 受雇於公營機關, 受雇者於私營企業, 受雇...
## $ Staff       <fctr> 50-99人, 50-99人, 2-9人, 100-199人, 10-29人, 30-49人, 3...
## $ Hours       <int> 40, 70, 57, 51, 64, 50, 50, 47, 50, 60, 45, 40, 56...
## $ J_year      <dbl> 8, 4, 21, 1, 6, 0, 1, 1, 17, 7, 3, 23, 1, 2, 1, 1,...
## $ J_total     <dbl> 8, 4, 30, 1, 6, 0, 2, 2, 28, 7, 30, 26, 1, 10, 1, ...
## $ income      <fctr> 2-3萬以下, 3-4萬以下, 3-4萬以下, 4-5萬以下, 2萬以下, 3-4萬以下, 3-4...
## $ SubMismatch <int> 2, 3, 5, 4, 5, 4, 3, 3, 4, 4, 4, 4, 5, 5, 3, 2, 4,...
## $ JobSat      <int> 4, 3, 5, 6, 7, 5, 3, 6, 3, 5, 4, 7, 3, 4, 4, 4, 5,...
## $ EduZone     <dbl> 5, 4, 2, 4, 4, 4, 4, 4, 5, 5, 3, 5, 4, 4, 4, 4, 5,...
## $ Region      <fctr> 高屏澎, 桃竹苗, 高屏澎, 中彰投, 雲嘉南, 北北基, 北北基, 中彰投, 高屏澎, 中彰投,...
## $ Salary      <dbl> 25000, 35000, 35000, 45000, 20000, 35000, 35000, 4...
## $ Age         <dbl> 34, 30, 62, 25, 21, 24, 25, 26, 57, 35, 54, 54, 23...
## $ JobZone     <dbl> 3, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 3, 2, 4,...
## $ JobCor      <int> 1, 2, 1, 2, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 3, 1, 2,...
## $ Core        <fctr> 無關聯, 部分關聯, 無關聯, 部分關聯, 無關聯, 核心關聯, 無關聯, 無關聯, 無關聯, 核...
## $ ObjOver     <fctr> over, adequate, under, adequate, adequate, adequa...
## $ JobZone_D   <dbl> 2, 0, -1, 0, 0, 0, 0, 0, 1, 1, -1, 0, 0, 0, 1, 2, ...

# NA
apply(apply(dta, 1, is.na), 1, sum)

##         SID      Gender      Sector       EduLv  SubEduOver     Require 
##           0           0           0           0           0           0 
##      Field1        City    Category       Staff       Hours      J_year 
##           0           0           0           0           0           0 
##     J_total      income SubMismatch      JobSat     EduZone      Region 
##           0           0           0           0           0           0 
##      Salary         Age     JobZone      JobCor        Core     ObjOver 
##           0           0           0           0           0           0 
##   JobZone_D 
##           0

對照組設定

對照組：女、私立、遊憩運動學群、技術學院、宜花東離島、過量教育(符合工作要求)、客評關聯（無關聯）

# check and pick out
lapply(dta[,c("Sector", "Field1", "City", "Region","EduLv", "SubEduOver", "ObjOver")], levels)

## $Sector
## [1] "私立"         "國外學校"     "國立（公立）"
## 
## $Field1
##  [1] "遊憩與運動學群" "大眾傳播學群"   "工程學群"       "文史哲學群"    
##  [5] "外語學群"       "生命科學學群"   "生物資源學群"   "地球與環境學群"
##  [9] "法政學群"       "社會與心理學群" "建築與設計學群" "財經學群"      
## [13] "教育學群"       "資訊學群"       "管理學群"       "數理化學群"    
## [17] "醫藥衛生學群"   "藝術學群"      
## 
## $City
##  [1] "宜蘭縣" "花蓮縣" "金門縣" "南投縣" "屏東縣" "苗栗縣" "桃園市"
##  [8] "高雄市" "基隆市" "雲林縣" "新北市" "新竹市" "新竹縣" "嘉義市"
## [15] "嘉義縣" "彰化縣" "臺中市" "臺北市" "臺東縣" "臺南市" "澎湖縣"
## 
## $Region
## [1] "宜花東離島" "北北基"     "桃竹苗"     "中彰投"     "雲嘉南"    
## [6] "高屏澎"    
## 
## $EduLv
##  [1] "技術學院" "博士"     "碩士"     "普通大學" "科技大學" "五專"    
##  [7] "三專"     "二專"     "高中"     "高職"     "國中"    
## 
## $SubEduOver
## [1] "符合工作要求" "高於工作要求" "低於工作要求"
## 
## $ObjOver
## [1] "adequate" "over"     "under"

names(dta)

##  [1] "SID"         "Gender"      "Sector"      "EduLv"       "SubEduOver" 
##  [6] "Require"     "Field1"      "City"        "Category"    "Staff"      
## [11] "Hours"       "J_year"      "J_total"     "income"      "SubMismatch"
## [16] "JobSat"      "EduZone"     "Region"      "Salary"      "Age"        
## [21] "JobZone"     "JobCor"      "Core"        "ObjOver"     "JobZone_D"

p <- dplyr::select(dta, -City, -income, -JobSat)

Plot1

age

ggplot(p , aes(x = Age))+
  geom_bar(position="dodge")+
  scale_x_continuous(limits=c(20,75), breaks=seq(20,75, by = 5))+
  scale_y_continuous(limits=c(0,180), breaks=seq(0,180, by = 20))+
  labs(x = "年齡")+
  theme_bw()

salary

ggplot(p, aes(x = as.factor(Salary))) + 
  geom_bar(position="dodge")+
  labs(x = "Salary") + 
  theme_bw() + 
  theme(axis.text.x = element_text(hjust = 1, angle =30))

Region

ggplot(p, aes(x = Region)) + 
  geom_bar(position="dodge")+
  geom_text(stat = "count", aes(label = ..count.., y = ..count.. , vjust = -.5))+
  geom_text(stat = "count", aes(y = ..count.., label = sprintf("%.0f%%", round(..count../sum(..count..)*100))),
            position = position_dodge(width = 0.9), color = "white", vjust = 1)+
  labs(x = "地區") + 
  theme_bw() + 
  theme(axis.text.x = element_text(hjust = 1))

# Region & Field
ggplot(dta, aes(Field1, fill = Region))+
  geom_bar()+
  coord_flip()+
  geom_text(stat = "count", aes(label = ..count.., y = ..count..), 
            position = position_stack(vjust = 0.5), size = 3)+
  labs(x = "Study Field")

Edu Level

ggplot(p, aes(x = EduLv)) + 
  geom_bar(position="dodge")+
  geom_text(stat = "count", aes(label = ..count.., y = ..count.. , hjust = -.3))+
  labs(x = "學歷") + 
  coord_flip() +
  theme_bw() +
  theme(axis.text.x = element_text(hjust = 1))

Edu Zone

ggplot(p, aes(x = EduZone)) + 
  geom_bar(position="dodge")+
  geom_text(stat = "count", aes(label = ..count.., y = ..count.. , hjust =.5, vjust = -0.5))+
  scale_x_continuous(limits=c(0,6), breaks=seq(1,5, by = 1))+
  labs(x = "學歷級區") + 
  theme_bw() +
  theme(axis.text.x = element_text(hjust = 1))

Study Field

ggplot(p, aes(x = Field1)) + 
  geom_bar(position="dodge")+
  geom_text(stat = "count", aes(label = ..count.., y = ..count.. , hjust = -.3))+
  geom_text(stat = "count", aes(y = ..count.., label = sprintf("%.0f%%", round(..count../sum(..count..)*100))),
            position = position_dodge(width = .9), color = "white", hjust = 1)+
  coord_flip() +
  labs(x = "學群") + 
  theme_bw() +
  theme(axis.text.x = element_text(hjust = 1))

Overeducation

# 
ggplot(p, aes(x = JobZone_D))+
  geom_histogram( binwidth = .5)+
  geom_text(stat = "count", aes(label = ..count.., y = ..count.. , vjust = -.75))+
  scale_x_continuous(limits=c(-5,5), breaks=seq(-4,4, by = 1))+
  labs(x = "客評過量")+
  theme_bw()

# 
ggplot(p , aes(x =JobZone_D))+
  geom_histogram(aes(y =..density..), binwidth = .5)+
  geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", size = 3, vjust = -2) +
  scale_x_continuous(limits=c(-5,5), breaks=seq(-4,4, by = 1))+
  geom_vline(xintercept = 0, color = "gray",  linetype = 2) +
  facet_wrap(~EduLv, nrow = 5)+
  labs(x = "客評過量")+
  theme_bw()

#
ggplot(p, aes(factor(SubEduOver), y = ..prop.., group = 1))+
  geom_bar()+
  geom_text(stat = "count", aes(label = ..count.., y = ..prop..), vjust = -.5)+
  labs(x = "自評過量", y = "百分比")+
  theme_bw()

#
ggplot(p, aes(factor(SubEduOver), y = ..prop.., group = 1))+
  geom_bar()+
  geom_text(stat = "count", 
            aes(label = ..count.., y = ..prop..), vjust = -.5,size=3)+
  facet_wrap(~EduLv, nrow = 2)+
  labs(x = "自評過量", y = "百分比")+
  theme_bw()+
  theme(axis.text.x = element_text(hjust = 1, angle =20))

Education mismatch

#
ggplot(p , aes(x = JobCor))+
  geom_histogram( binwidth = .5)+
  geom_text(stat = "count", aes(label = ..count.., y = ..count.. , vjust = -.75))+
  scale_x_continuous(breaks=seq(1,3, by = 1))+
  labs(x = "客評關聯")+
  theme_bw()

#
ggplot(p , aes(x = JobCor))+
  geom_histogram(aes(y =..density..), binwidth = .5)+
  geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", size = 3, vjust = -2) +
  scale_x_continuous(breaks=seq(1,3, by = 1))+
  facet_wrap(~EduLv, nrow = 2)+
  labs(x = "客評關聯")+
  
  theme_bw()

#
ggplot(dta, aes(factor(SubMismatch), y = ..prop.., group = 1))+
  geom_bar()+
  geom_text(stat = "count", aes(label = ..count.., y = ..prop..), vjust = -.5)+
  labs(x = "自評關聯", y = "百分比")+
  theme_bw()

#
ggplot(dta, aes(factor(SubMismatch), y = ..prop.., group = 1))+
  geom_bar()+
  geom_text(stat = "count", aes(label = ..count.., y = ..prop..),size=3, vjust = -.5)+
  labs(x = "自評關聯", y = "百分比")+
  geom_vline(xintercept = 3, color = "gray",  linetype = 2) +
  facet_wrap(~EduLv, nrow = 2)+
  theme_bw()

Plot2

Overeducation

# salary & study fields & 自評過量
ggplot(p, aes( SubEduOver, Salary))+
  stat_summary(fun.data = mean_se, geom = "pointrange",na.rm = T)+
  geom_hline(yintercept = mean(p$Salary), color = "gray",  linetype = 2) +
  facet_wrap(~Field1,ncol = 7)+
  labs(x = "自評過量")+
  theme_bw()+
  theme(axis.text.x = element_text(size=5, angle = 30), legend.position = "bottom")

# salary & study fields & 客評過量
ggplot(p, aes( ObjOver, Salary))+
  stat_summary(fun.data = mean_se, geom = "pointrange",na.rm = T)+
  geom_hline(yintercept = mean(p$Salary), color = "gray",  linetype = 2) +
  facet_wrap(~Field1,ncol = 7)+
  scale_y_continuous(limits=c(20000,80000), breaks=seq(30000,70000, by = 10000))+
  labs(x = "客評過量")+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 30), legend.position = "bottom")

Education mismatch

# salary & study fields & 自評關聯
ggplot(p, aes( SubMismatch, Salary))+
  stat_summary(fun.data = mean_se, geom = "pointrange",na.rm = T)+
  geom_hline(yintercept = mean(p$Salary), color = "gray",  linetype = 2) +
  facet_wrap(~Field1,ncol = 7)+
  scale_y_continuous(limits=c(20000,80000), breaks=seq(30000,70000, by = 10000))+
  labs(x = "自評過量")+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 30), legend.position = "bottom")

# salary & study fields & 客評關聯
ggplot(p, aes( Core, Salary))+
  stat_summary(fun.data = mean_se, geom = "pointrange",na.rm = T)+
  geom_hline(yintercept = mean(p$Salary), color = "gray",  linetype = 2) +
  facet_wrap(~Field1,ncol = 7)+
  scale_y_continuous(limits=c(20000,80000), breaks=seq(30000,70000, by = 10000))+
  labs(x = "客評過量")+
  theme_bw()+
  theme(axis.text.x = element_text(size=7, angle = 30), legend.position = "bottom")

Scatter plot

Correlation

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) {
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use="pair"))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt)
}
pairs(p[, c(16,10:14)], col = "lemonchiffon4", pch = '.',
upper.panel = panel.smooth, lower.panel = panel.cor)

Work hours

工作時數部分看來在不同學歷間、不同自評關聯程度可能效果不同

ggplot(p, aes( Hours, log(Salary)))+
  geom_point()+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ SubEduOver)+
  labs(title ="以自評過量區分")+
  theme_bw()

ggplot(p, aes( Hours,log(Salary)) )+
  geom_point()+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ ObjOver)+
  labs(title ="以客評過量區分")+
  theme_bw()

ggplot(p, aes( Hours ,log(Salary)))+
  geom_point()+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ Core)+
  labs(title ="以客評關聯區分")+
  theme_bw()

ggplot(p, aes( Hours ,log(Salary)))+
  geom_point(size=.5)+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ SubMismatch)+
  labs(title ="以自評關聯區分")+
  theme_bw()

ggplot(p, aes(  Hours,log(Salary)))+
  geom_point(size=.5)+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ EduLv)+
  labs(title ="以學歷區分")+
  theme_bw()

現職工作年資

工作年資無論是在自評客評的過量及關聯都有類似的影響反倒是不同學歷有不同的影響效果

ggplot(p, aes( J_year, log(Salary)))+
  geom_point()+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ SubEduOver)+
  labs(title ="以自評過量區分")+
  theme_bw()

ggplot(p, aes( J_year,log(Salary)) )+
  geom_point()+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ ObjOver)+
  labs(title ="以客評過量區分")+
  theme_bw()

ggplot(p, aes(J_year,log(Salary)))+
 geom_point()+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ Core)+
  labs(title ="以客評關聯區分")+
  theme_bw()

ggplot(p, aes( J_year,log(Salary)))+
  geom_point(size=.5)+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ SubMismatch)+
  labs(title ="以自評關聯區分")+
  theme_bw()

ggplot(p, aes( J_year,log(Salary)))+
  geom_point(size=.5)+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ EduLv)+
  labs(title ="以學歷區分")+
  theme_bw()

## Warning in qt((1 - level)/2, df): NaNs produced

自評關聯

無論自評或客評的過量情形為何，自評關聯愈高者薪資也愈高反倒是在學歷間不同自評關聯對薪資的影響不一

ggplot(p, aes( SubMismatch, log(Salary)))+
  geom_point()+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ SubEduOver)+
  labs(title ="以自評過量區分")+
  theme_bw()

ggplot(p, aes( SubMismatch,log(Salary)) )+
  geom_point()+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ ObjOver)+
  labs(title ="以客評過量區分")+
  theme_bw()

ggplot(p, aes( SubMismatch,log(Salary)))+
  geom_point(size=.5)+
  stat_smooth(method = "lm", color = "black")+
  facet_wrap( ~ EduLv)+
  labs(title ="以學歷區分")+
  theme_bw()

## Warning in qt((1 - level)/2, df): NaNs produced

overview-2

Li

2018年4月24日

Data

對照組設定

Plot1

age

salary

Region

Edu Level

Edu Zone

Study Field

Overeducation

Education mismatch

Plot2

Overeducation

Education mismatch

Scatter plot

Correlation

Work hours

現職工作年資

自評關聯