市场薪酬调研小项目_电子工程师

R派模思

2022-04-08

前言

本次市场薪酬调研小项目示例,如下说明:

工具库准备

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: sysfonts
## Loading required package: showtextdb

数据准备

## # A tibble: 6 × 9
##   company company_attribu… job   joblink location recruiter_name recruiter_title
##   <chr>   <chr>            <chr> <chr>   <chr>    <chr>          <chr>          
## 1 天邦达… 电子/芯片/半导…  高级… https:… 深圳-宝… 陈女士,游女士… 主管,综合管理,…
## 2 成都世… 电子/芯片/半导…  电子… https:… 成都,苏… 贺女士,徐女士… 人事经理,人力… 
## 3 深圳市… 电子/芯片/半导…  电子… https:… 杭州-滨… 周先生,朱女士… 招聘主管,招聘… 
## 4 山东英… 制药/生物工程,1… 数字… https:… 北京-海… 田女士,刘女士… 招聘专员,人力… 
## 5 苏州医… 电子设备,1-49人… 电子… https:… 杭州-萧… 沈先生,沈女士… 人事主管,行政… 
## 6 上海格… 制药/生物工程,…  电子… https:… 上海-浦… 王女士,王女士… 人事,招聘专员,…
## # … with 2 more variables: requirements <chr>, salary <chr>

建立第一张表即框架模板

查看余下第二行到第九行数据长度一致性

##  [1] "2"                     "com_len"               "40"                   
##  [4] "job_len"               "40"                    "joblink_len"          
##  [7] "40"                    "location_len"          "40"                   
## [10] "recruitment_name_len"  "40"                    "recruitment_title_len"
## [13] "37"                    "salary_len"            "40"                   
##  [1] "3"                     "com_len"               "40"                   
##  [4] "job_len"               "40"                    "joblink_len"          
##  [7] "40"                    "location_len"          "40"                   
## [10] "recruitment_name_len"  "40"                    "recruitment_title_len"
## [13] "36"                    "salary_len"            "40"                   
##  [1] "4"                     "com_len"               "40"                   
##  [4] "job_len"               "40"                    "joblink_len"          
##  [7] "40"                    "location_len"          "40"                   
## [10] "recruitment_name_len"  "40"                    "recruitment_title_len"
## [13] "29"                    "salary_len"            "40"                   
##  [1] "5"                     "com_len"               "40"                   
##  [4] "job_len"               "40"                    "joblink_len"          
##  [7] "40"                    "location_len"          "40"                   
## [10] "recruitment_name_len"  "40"                    "recruitment_title_len"
## [13] "39"                    "salary_len"            "40"                   
##  [1] "6"                     "com_len"               "40"                   
##  [4] "job_len"               "40"                    "joblink_len"          
##  [7] "40"                    "location_len"          "40"                   
## [10] "recruitment_name_len"  "40"                    "recruitment_title_len"
## [13] "38"                    "salary_len"            "40"                   
##  [1] "7"                     "com_len"               "40"                   
##  [4] "job_len"               "40"                    "joblink_len"          
##  [7] "40"                    "location_len"          "40"                   
## [10] "recruitment_name_len"  "40"                    "recruitment_title_len"
## [13] "30"                    "salary_len"            "40"                   
##  [1] "8"                     "com_len"               "40"                   
##  [4] "job_len"               "40"                    "joblink_len"          
##  [7] "40"                    "location_len"          "40"                   
## [10] "recruitment_name_len"  "40"                    "recruitment_title_len"
## [13] "33"                    "salary_len"            "40"                   
##  [1] "9"                     "com_len"               "40"                   
##  [4] "job_len"               "40"                    "joblink_len"          
##  [7] "40"                    "location_len"          "40"                   
## [10] "recruitment_name_len"  "40"                    "recruitment_title_len"
## [13] "32"                    "salary_len"            "40"

补齐空白数据达到表格行列一致性

for (i in 2:9){
    com <- elec_engineer[i,1] %>% str_split(',') %>% unlist 
    job <- elec_engineer[i,3] %>% str_split(',') %>% unlist 
    joblink <- elec_engineer[i,4] %>% str_split(',') %>% unlist
    jobid <- str_extract(joblink,'[0-9]*.shtml') %>% parse_number()
    location <- elec_engineer[i,5] %>% str_split(',') %>% unlist 
    recruiter_name <- elec_engineer[i,6] %>% str_split(',') %>% unlist 
    recruiter_title <- elec_engineer[i,7] %>% str_split(',') %>% unlist 
    salary <- elec_engineer[i,9] %>% str_split(',') %>% unlist 
    if (length(recruiter_title) != 40){
        gap <- 40 - length(recruiter_title)
        recruiter_title1 <- c(recruiter_title,rep('未提供',gap))
    }
    df1 <- cbind(job,com,location,salary,joblink,jobid,recruiter_name,recruiter_title1)
    df <- rbind(df,df1)
}
df %>% glimpse
## Rows: 360
## Columns: 8
## $ job              <chr> "高级电子工程师", "电子工程师", "电力电子软件研发工程…
## $ com              <chr> "天邦达科技", "浙江巨磁智能技术有限公司", "光亚智能",…
## $ location         <chr> "深圳-宝安区", "长沙-岳麓区", "苏州-虎丘区", "东莞-望…
## $ salary           <chr> "15-25k", "15-30k", "15-35k·13薪", "15-35k·13薪", "15…
## $ joblink          <chr> "https://www.liepin.com/job/1914205188.shtml?d_sfrom=…
## $ jobid            <chr> "1914205188", "1937969367", "1945326843", "1945193363…
## $ recruiter_name   <chr> "陈女士", "游女士", "易先生", "易先生", "陈女士", "朱…
## $ recruiter_title1 <chr> "主管", "综合管理", "人力资源经理", "人力资源经理", "…

提取城市数据

df$city <- strsplit(df$location,'-') %>% sapply('[[',1)
df %>% glimpse
## Rows: 360
## Columns: 9
## $ job              <chr> "高级电子工程师", "电子工程师", "电力电子软件研发工程…
## $ com              <chr> "天邦达科技", "浙江巨磁智能技术有限公司", "光亚智能",…
## $ location         <chr> "深圳-宝安区", "长沙-岳麓区", "苏州-虎丘区", "东莞-望…
## $ salary           <chr> "15-25k", "15-30k", "15-35k·13薪", "15-35k·13薪", "15…
## $ joblink          <chr> "https://www.liepin.com/job/1914205188.shtml?d_sfrom=…
## $ jobid            <chr> "1914205188", "1937969367", "1945326843", "1945193363…
## $ recruiter_name   <chr> "陈女士", "游女士", "易先生", "易先生", "陈女士", "朱…
## $ recruiter_title1 <chr> "主管", "综合管理", "人力资源经理", "人力资源经理", "…
## $ city             <chr> "深圳", "长沙", "苏州", "东莞", "东莞", "深圳", "嘉兴…

计算平均月薪

ind <-  which(df$salary=='面议')  #将薪酬为面议替换为0-0形式方便值拆分
value <- rep('0-0',length(ind))
df$salary <- replace(df$salary,ind,value)
df$salaryMin <- strsplit(df$salary,'·') %>% sapply('[[',1) %>% str_split('-') %>% sapply('[[',1) %>%
    as.numeric()*1000
df$salaryMax <- strsplit(df$salary,'·') %>% sapply('[[',1) %>% str_split('-') %>% 
    sapply('[[',2) %>% parse_number()*1000
df$salaryAvg <- (df$salaryMin+df$salaryMax)/2
df %>% glimpse
## Rows: 360
## Columns: 12
## $ job              <chr> "高级电子工程师", "电子工程师", "电力电子软件研发工程…
## $ com              <chr> "天邦达科技", "浙江巨磁智能技术有限公司", "光亚智能",…
## $ location         <chr> "深圳-宝安区", "长沙-岳麓区", "苏州-虎丘区", "东莞-望…
## $ salary           <chr> "15-25k", "15-30k", "15-35k·13薪", "15-35k·13薪", "15…
## $ joblink          <chr> "https://www.liepin.com/job/1914205188.shtml?d_sfrom=…
## $ jobid            <chr> "1914205188", "1937969367", "1945326843", "1945193363…
## $ recruiter_name   <chr> "陈女士", "游女士", "易先生", "易先生", "陈女士", "朱…
## $ recruiter_title1 <chr> "主管", "综合管理", "人力资源经理", "人力资源经理", "…
## $ city             <chr> "深圳", "长沙", "苏州", "东莞", "东莞", "深圳", "嘉兴…
## $ salaryMin        <dbl> 15000, 15000, 15000, 15000, 15000, 10000, 15000, 6000…
## $ salaryMax        <dbl> 25000, 30000, 35000, 35000, 30000, 15000, 25000, 1200…
## $ salaryAvg        <dbl> 20000, 22500, 25000, 25000, 22500, 12500, 20000, 9000…

移除重复记录,建立图表

ind <- which(duplicated(df$jobid)==TRUE)
df[-ind,] %>% group_by(city) %>% summarize(n=n(),avg_salary=mean(salaryAvg)) %>% 
    arrange(-n) %>% head %>% ggplot(aes(x=city))+geom_col(aes(y=avg_salary))+geom_point(aes(y=n*100),color='red')+geom_text(aes(label=n,x=city,y=n*10),color='red')+
    geom_text(aes(label=round(avg_salary,0),x=city,y=avg_salary+1e+3),color='green')+
    labs(y='平均月薪',x='城市',title='电子工程师招聘需求最大的前六城市')+scale_y_continuous(sec.axis = sec_axis(~./100))

岗位要求分布

行业与公司规模

结论: