資料整理
dta <- read.csv("C:/Users/USER/Desktop/EDU MIS/project-research/project-research_data_data.csv", header = T)
# reorder levels
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
dta$學群 <- recode_factor(dta$學群,
"1" = "資訊學群",
"2" = "工程學群",
"3" = "數理化學群",
"4" = "地球與環境學群",
"5" = "生物資源學群",
"6" = "生命科學學群",
"7" = "醫藥衛生學群",
"8" = "社會與心理學群",
"9" = "教育學群",
"10" = "外語學群",
"11" = "文史哲學群",
"12" = "藝術學群",
"13" = "大眾傳播學群",
"14" = "建築與設計學群",
"15" = "遊憩與運動學群",
"16" = "法政學群",
"17" = "財經學群",
"18" = "管理學群")
#
dta$薪資 <- recode_factor(dta$薪資,
"1" = "2萬以下",
"2" = "2-3萬以下",
"3" = "3-4萬以下",
"4" = "4-5萬以下",
"5" = "5-6萬以下",
"6" = "6-7萬以下",
"7" = "7-8萬以下",
"8" = "8-9萬以下",
"9" = "9-10萬以下",
"10" = "10-11萬以下",
"11" = "11-12萬以下",
"12" = "12-13萬以下",
"13" = "13-14萬以下",
"14" = "14-15萬以下",
"15" = "15-16萬以下",
"16" = "16-17萬以下",
"17" = "17-18萬以下",
"18" = "18-19萬以下",
"19" = "19-20萬以下",
"20" = "20-30萬以下",
"21" = "30萬以上")
#
dta$薪資m <- recode_factor(dta$薪資,
"1" = "20000",
"2" = "25000",
"3" = "35000",
"4" = "45000",
"5" = "55000",
"6" = "65000",
"7" = "75000",
"8" = "85000",
"9" = "95000",
"10" = "105000",
"11" = "115000",
"12" = "125000",
"13" = "135000",
"14" = "145000",
"15" = "155000",
"16" = "165000",
"17" = "175000",
"18" = "185000",
"19" = "195000",
"20" = "205000",
"21" = "350000")
#為公私立排序
dta$公私立 <- factor(dta$公私立, levels = c( "國立(公立)" , "私立" , "國外學校" ))
#為學歷排序
dta$學歷 <- factor(dta$學歷, levels = c( "博士" , "碩士" , "普通大學", "科技大學", "技術學院", "五專" , "二專" , "高職", "高中" , "軍警學校" ))
#
dta$自評過量 <- as.numeric(recode_factor(dta$自評過量,
"1" = "低於工作要求",
"2" = "符合工作要求",
"3" = "高於工作要求"))
# age 除去無效樣本
dta$age <- 2018-dta$出生年
summary(dta$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.00 26.00 30.00 31.28 35.00 70.00
dta<- arrange(dta, age)
dta <- dta[-c(1486,1487),]
Plot
# 年齡次數配
library(ggplot2)
ggplot(dta, aes(x = age))+
geom_bar(position="dodge")+
scale_x_continuous(limits=c(20,75), breaks=seq(20,75, by = 5))+
scale_y_continuous(limits=c(0,140), breaks=seq(0,140, by = 20))+
labs(x = "年齡")+
theme_bw()

# 年齡百分比分配
ggplot(dta, aes(x = age))+
geom_histogram(aes(y =..density..), binwidth = .8)+
scale_x_continuous(limits=c(20,75), breaks=seq(20,75, by = 5))+
labs(x = "年齡")+
theme_bw()
## Warning: Removed 1 rows containing missing values (geom_bar).

# 自評過量 vs 公私立
ggplot(dta, aes(x = 自評過量)) +
geom_histogram(aes(y =..density..), binwidth = .5)+
facet_wrap(~公私立)+
scale_x_continuous(breaks=1:3)+
labs(x = "自評過量", y = "Density")+
theme_bw()

#自評學用 vs 公私立
ggplot(dta, aes(x = 自評學用)) +
geom_histogram(aes(y =..density..), binwidth = .5)+
facet_wrap(~公私立)+
labs(x = "自評學用", y = "Density")+
theme_bw()

#自評過量 vs 學歷
ggplot(dta, aes(x = 自評過量)) +
geom_histogram(aes(y =..density..), binwidth = .5)+
facet_wrap(~學歷, nrow = 2)+
scale_x_continuous(breaks=1:3) +
scale_y_continuous(limits=c(0,2), breaks=seq(0,2, by = 0.3)) +
labs(x = "自評過量", y = "Density")+
theme_bw()

#自評學用 vs 學歷
ggplot(dta, aes(x = 自評學用)) +
geom_histogram(aes(y =..density..), binwidth = .5)+
facet_wrap(~學歷)+
labs(x = "自評學用", y = "Density")+
theme_bw()
