Data
# set the ref & differ from zones
str(dta)
## 'data.frame': 1837 obs. of 31 variables:
## $ SID : Factor w/ 1769 levels "A1","A10","A100",..: 1 110 218 328 438 438 512 523 534 544 ...
## $ Born : int 1992 1990 1989 1988 1987 1987 1987 1992 1993 1988 ...
## $ Gender : Factor w/ 2 levels "女","男": 1 2 2 1 2 2 2 1 1 1 ...
## $ Sector : Factor w/ 3 levels "私立","國外學校",..: 3 1 3 3 1 1 3 3 3 3 ...
## $ EduLv : Factor w/ 11 levels "二專","三專",..: 11 10 11 11 10 10 11 10 10 11 ...
## $ Field1 : Factor w/ 18 levels "大眾傳播學群",..: 12 2 12 12 7 7 2 9 3 2 ...
## $ Field2 : Factor w/ 178 levels "","13-經營與財務運作相關職類",..: 109 140 95 95 162 162 9 71 19 140 ...
## $ Job1 : Factor w/ 24 levels "","11-主管職類",..: 9 23 9 9 5 5 5 3 9 22 ...
## $ Job2 : Factor w/ 130 levels "","/","11-1000 高階主管",..: 23 80 23 23 12 12 12 7 23 71 ...
## $ Job3 : Factor w/ 464 levels "","11-1011.00 執行長(Chief Executives)",..: 136 336 143 141 74 74 74 33 136 305 ...
## $ City : Factor w/ 21 levels "宜蘭縣","花蓮縣",..: 20 8 17 7 18 18 8 18 4 12 ...
## $ Category : Factor w/ 4 levels "","自營者","受雇於公營機關",..: 3 3 4 3 4 4 4 4 3 4 ...
## $ Staff : Factor w/ 9 levels "","10-29人","100-199人",..: 2 8 5 8 9 9 9 9 7 6 ...
## $ Hours : num 48 40 40 50 40 40 40 50 40 50 ...
## $ J_year : Factor w/ 50 levels "1","1.5","10",..: 16 1 1 43 28 28 41 1 28 41 ...
## $ J_total : num 2 3 2 5 4 4 6 1 3 5 ...
## $ income : Factor w/ 21 levels "10-11萬以下",..: 13 14 11 16 14 14 17 14 16 18 ...
## $ SubEduOver : Factor w/ 3 levels "低於工作要求",..: 3 2 3 2 3 3 2 3 3 3 ...
## $ Require : Factor w/ 11 levels "二專","不需要接受學校正式教育",..: 11 11 10 11 10 10 5 10 10 10 ...
## $ SubMismatch : int 4 3 5 4 2 2 2 4 3 5 ...
## $ JobSat : int 6 6 6 6 4 4 5 5 3 5 ...
## $ Email : Factor w/ 1313 levels "","0901pin@gmail.com",..: 945 1053 829 1 1235 1235 1 1 1 1 ...
## $ EduZone : int 5 4 5 5 4 4 5 4 4 5 ...
## $ Region : Factor w/ 6 levels "中彰投","北北基",..: 6 5 1 4 2 2 5 2 1 4 ...
## $ Salary : num 20000 35000 25000 45000 35000 35000 55000 35000 45000 65000 ...
## $ Age : int 26 28 29 30 31 31 31 26 25 30 ...
## $ JobCode : Factor w/ 463 levels "","11-1011.00",..: 136 335 143 141 74 74 74 33 136 304 ...
## $ Field_JobCode: Factor w/ 1168 levels "13-經營與財務運作相關職類13-2011.01",..: 663 926 585 583 1070 1070 17 437 72 919 ...
## $ JobZone : int 4 2 4 4 4 4 4 4 4 2 ...
## $ 職業關聯 : Factor w/ 2 levels "核心職業","關連職業": 1 NA NA 1 2 2 1 NA NA NA ...
## $ JobAsso : int 3 1 3 3 2 2 3 1 1 1 ...
dta <- dta %>% mutate( Sector = relevel(Sector, ref = "私立"),
Region = relevel(Region, ref = "宜花東離島"),
EduLv = factor(EduLv, levels=c("國中","高職","高中", "二專",
"三專","五專","技術學院",
"科技大學","普通大學","碩士",
"博士","軍警學校" )),
J_year = as.numeric(J_year),
JobZone = as.numeric(JobZone),
EduZone = as.numeric(EduZone),
ZoneD = EduZone - JobZone) %>%
filter( Age < 75 & Age > 20)
# hunt for missing values by variables
apply(apply(dta, 1, is.na), 1, sum)
## SID Born Gender Sector EduLv
## 0 0 0 0 0
## Field1 Field2 Job1 Job2 Job3
## 0 0 0 0 0
## City Category Staff Hours J_year
## 1 0 0 0 0
## J_total income SubEduOver Require SubMismatch
## 0 0 0 0 0
## JobSat Email EduZone Region Salary
## 0 0 9 1 0
## Age JobCode Field_JobCode JobZone 職業關聯
## 0 0 0 169 1307
## JobAsso ZoneD
## 0 177
# pick out
names(dta)
## [1] "SID" "Born" "Gender" "Sector"
## [5] "EduLv" "Field1" "Field2" "Job1"
## [9] "Job2" "Job3" "City" "Category"
## [13] "Staff" "Hours" "J_year" "J_total"
## [17] "income" "SubEduOver" "Require" "SubMismatch"
## [21] "JobSat" "Email" "EduZone" "Region"
## [25] "Salary" "Age" "JobCode" "Field_JobCode"
## [29] "JobZone" "職業關聯" "JobAsso" "ZoneD"
p <- select(dta, -2, -c(7:10), -Email, -c(27:28), -30 )
# subject with missing values
# dta[which(is.na(dta$JobZone)), ]
Description analysis
age
ggplot(p , aes(x = Age))+
geom_bar(position="dodge")+
scale_x_continuous(limits=c(20,75), breaks=seq(20,75, by = 5))+
scale_y_continuous(limits=c(0,180), breaks=seq(0,180, by = 20))+
labs(x = "年齡")+
theme_bw()

salary
ggplot(p, aes(x = as.factor(Salary))) +
geom_bar(position="dodge")+
labs(x = "Salary") +
theme_bw() +
theme(axis.text.x = element_text(hjust = 1, angle =30))

Region
ggplot(p, aes(x = Region)) +
geom_bar(position="dodge")+
geom_text(stat = "count", aes(label = ..count.., y = ..count.. , vjust = -.5))+
geom_text(stat = "count", aes(y = ..count.., label = sprintf("%.0f%%", round(..count../sum(..count..)*100))),
position = position_dodge(width = 0.9), color = "white", vjust = 1)+
labs(x = "地區") +
theme_bw() +
theme(axis.text.x = element_text(hjust = 1))

Edu Level
ggplot(p, aes(x = EduLv)) +
geom_bar(position="dodge")+
geom_text(stat = "count", aes(label = ..count.., y = ..count.. , hjust = -.3))+
labs(x = "學歷") +
coord_flip() +
theme_bw() +
theme(axis.text.x = element_text(hjust = 1))

Edu Zone
ggplot(p, aes(x = EduZone)) +
geom_bar(position="dodge")+
geom_text(stat = "count", aes(label = ..count.., y = ..count.. , hjust =.5, vjust = -0.5))+
scale_x_continuous(limits=c(0,6), breaks=seq(1,5, by = 1))+
labs(x = "學歷級區") +
theme_bw() +
theme(axis.text.x = element_text(hjust = 1))
## Warning: Removed 9 rows containing non-finite values (stat_count).
## Warning: Removed 9 rows containing non-finite values (stat_count).

Study Field
ggplot(p, aes(x = Field1)) +
geom_bar(position="dodge")+
geom_text(stat = "count", aes(label = ..count.., y = ..count.. , hjust = -.3))+
geom_text(stat = "count", aes(y = ..count.., label = sprintf("%.0f%%", round(..count../sum(..count..)*100))),
position = position_dodge(width = .9), color = "white", hjust = 1)+
coord_flip() +
labs(x = "學群") +
theme_bw() +
theme(axis.text.x = element_text(hjust = 1))

Difference between Edu&Job zone
# 軍警學校為NA因此不列入繪圖
#
ggplot(data= filter(p, ZoneD!="軍警學校"), aes(x = ZoneD))+
geom_histogram( binwidth = .5)+
geom_text(stat = "count", aes(label = ..count.., y = ..count.. , vjust = -.75))+
scale_x_continuous(limits=c(-5,5), breaks=seq(-4,4, by = 1))+
labs(x = "客評過量")+
theme_bw()

#
ggplot(data= filter(p, ZoneD!="軍警學校"), aes(x =ZoneD))+
geom_histogram(aes(y =..density..), binwidth = .5)+
geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", size = 2.5, vjust = -2) +
scale_x_continuous(limits=c(-5,5), breaks=seq(-4,4, by = 1))+
geom_vline(xintercept = 0, color = "gray", linetype = 2) +
facet_wrap(~EduLv, nrow = 5)+
labs(x = "客評過量")+
theme_bw()

Corelation between Edu&Job
#軍警學校為NA因此不列入繪圖
#
ggplot(data= filter(p, ZoneD!="軍警學校"), aes(x = JobAsso))+
geom_histogram( binwidth = .5)+
geom_text(stat = "count", aes(label = ..count.., y = ..count.. , vjust = -.75))+
scale_x_continuous(breaks=seq(1,3, by = 1))+
labs(x = "客評關聯")+
theme_bw()

#
ggplot(data= filter(p, ZoneD!="軍警學校"), aes(x = JobAsso))+
geom_histogram(aes(y =..density..), binwidth = .5)+
geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", size = 3, vjust = -2) +
scale_x_continuous(breaks=seq(1,3, by = 1))+
labs(x = "客評關聯")+
facet_wrap(~EduLv, nrow = 2)+
theme_bw()
