# require(devtools)
# install_github('rCharts', 'ramnathv')
1. 高级人才招聘
Invalid .internal.selfref detected and fixed by taking a (shallow) copy of the data.table so that := can add this new column by reference. At an earlier point, this data.table has been copied by R (or been created manually using structure() or similar). Avoid key<-, names<- and attr<- which in R currently (and oddly) may copy the whole data.table. Use set* syntax instead to avoid copying: ?set, ?setnames and ?setattr. Also, in R<=v3.0.2, list(DT1,DT2) copied the entire DT1 and DT2 (R's list() used to copy named objects); please upgrade to R>v3.0.2 if that is biting. If this message doesn't help, please report to datatable-help so the root cause can be fixed.n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Invalid .internal.selfref detected and fixed by taking a (shallow) copy of the data.table so that := can add this new column by reference. At an earlier point, this data.table has been copied by R (or been created manually using structure() or similar). Avoid key<-, names<- and attr<- which in R currently (and oddly) may copy the whole data.table. Use set* syntax instead to avoid copying: ?set, ?setnames and ?setattr. Also, in R<=v3.0.2, list(DT1,DT2) copied the entire DT1 and DT2 (R's list() used to copy named objects); please upgrade to R>v3.0.2 if that is biting. If this message doesn't help, please report to datatable-help so the root cause can be fixed.It seems your data is too big for client-side DataTables. You may consider server-side processing: http://rstudio.github.io/DT/server.htmlIt seems your data is too big for client-side DataTables. You may consider server-side processing: http://rstudio.github.io/DT/server.html
4.智联招聘
k.zl.new%>%
.[,地址a:= paste("<a href='",地址1,"' target='blank' >链接1</a>",sep="")]%>%
.[,地址b:= paste("<a href='",地址2,"' target='blank' >链接2</a>",sep="")]%>%
select(.,-地址1,-地址2)%>%
datatable(., escape = FALSE,
caption = 'Table 3: 事业单位招聘信息汇总' ,options = list(
pageLength = 50, autoWidth = TRUE
))
```
---
title: "就业信息爬虫 V 0.92"
author: "马英辉"
output: 
  html_notebook: 
    code_folding: hide
    fig_caption: yes
    toc: true
    toc_float: true
    theme: lumen
---
```{r}
# require(devtools)
# install_github('rCharts', 'ramnathv')

```

```{r setup, include=FALSE}
  loadpacks <- function(k.list) {
    k.new <- k.list[!(k.list %in% installed.packages()[, "Package"])]
    if (length(k.new)) 
      install.packages(k.new)
    lapply(eval(k.list), require, character.only = TRUE)
  }  # 自动安装、加载命???
loadpacks(c('data.table','ggplot2','dplyr','reshape2',"foreign","readstata13","doBy","plm","plotluck","corrgram","statmod","gmnl","AER","plyr","lubridate","RSQLite","gridExtra","ggthemes","rJava","xlsx","magrittr","forcats","grid","RMySQL","knitr","plotly","prettydoc","DT"," dichromat","RColorBrewer","rCharts","rvest"))

library(devtools)
# install_github('lchiffon/REmap')
library(REmap)

par(family="STKaiti")
#forcats::fct_infreq

# if (!requireNamespace("devtools", quietly = TRUE))
#   install.packages("devtools")
# 
# devtools::install_github("rstudio/htmltools")
# devtools::install_github("rstudio/shiny")
# devtools::install_github("rstudio/miniUI")
# devtools::install_github("rstudio/addinexamples") 

# 附加主题
theme_Publication <- function(base_size=14, base_family="STKaiti") {
      library(grid)
      library(ggthemes)
      (theme_foundation(base_size=base_size, base_family=base_family)
       + theme(plot.title = element_text(face = "bold",
                                         size = rel(1.2), hjust = 0.5),
               text = element_text(),
               panel.background = element_rect(colour = NA),
               plot.background = element_rect(colour = NA),
               panel.border = element_rect(colour = NA),
               axis.title = element_text(face = "bold",size = rel(1)),
               axis.title.y = element_text(angle=90,vjust =2),
               axis.title.x = element_text(vjust = -0.2),
               axis.text = element_text(), 
               axis.line = element_line(colour="black"),
               axis.ticks = element_line(),
               panel.grid.major = element_line(colour="#f0f0f0"),
               panel.grid.minor = element_blank(),
               legend.key = element_rect(colour = NA),
               legend.position = "bottom",
               legend.box = "horizontal" , #legend.direction = "horizontal",
               legend.key.size= unit(0.2, "cm"),
               legend.spacing = unit(0, "cm"),
               legend.title = element_text(face="italic"),
               plot.margin=unit(c(10,5,5,5),"mm"),
               strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
               strip.text = element_text(face="bold")
          ))
      
}

scale_fill_Publication <- function(...){
      library(scales)
      discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)

}

scale_colour_Publication <- function(...){
      library(scales)
      discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)

}

knitr::opts_chunk$set(echo = TRUE)
```

```{r , include=FALSE}
# 下载数据代码
embed_data= function(x= mtcars, filename= "file.csv", label= "数据下载"){

  # Create encoded Base64 datastream 
  encode_data= function(x){
    write.csv(x, "./file.csv",fileEncoding = "CP936")
    enc= sprintf('data:text/csv;base64,%s', openssl::base64_encode(paste0(readLines("./file.csv"), collapse="\n")) )
    unlink("./file.csv")
    return(enc)
  }

  # String result ready to be placed in rmarkdown
  paste0("<a download='", filename, "' href=", encode_data(x), ">", label, "</a>")

}
```
# 1. 高级人才招聘
```{r 目录信息爬取, include=FALSE}
f.get <- function(k.num) {
k.webname <-  paste("http://www.gaoxiaojob.com/zhaopin/boshirencaizhaopin/index_",k.num,".html",sep="")
web<-read_html(k.webname,encoding = 'cp936')

k.link <- web %>% html_nodes(".list_b_info h2 a")%>% html_attr(., "href")
k.link2 <-k.link%>%paste('[Link](', ., sep='')%>%paste(., ')', sep='')
k.title <- web %>% html_nodes(".list_b_info h2 a") %>% html_attr(., "title")
k.name <- web %>% html_nodes(".list_b_info p a") %>% html_text(.)

k.day<- web %>% html_nodes(".list_b_date .day") %>% html_text(.)
k.ym <- web %>% html_nodes(".list_b_date .ym") %>% html_text(.)


data.table(日期=paste(k.ym,substr(k.day,1,2),sep="."),性质=k.name,题目=k.title,链接=k.link2,k.link)
}

# k.a <- data.table(c(1:100))%>%apply(.,1,f.get)%>%
#  do.call(rbind.fill ,.)%>%
#  data.table()%>%
#  .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接



```

```{r 8.16data, echo=FALSE}
### 2017年8月16日的数据库
# k.all.simple1 <- data.table(c(1:300))%>%apply(.,1,f.get)%>%
#   do.call(rbind.fill ,.)%>%
#   data.table()%>%
#   .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接
# 
# k.all.simple2 <- data.table(c(301:400))%>%apply(.,1,f.get)%>%
#   do.call(rbind.fill ,.)%>%
#   data.table()%>%
#   .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接
# 
# k.a <- rbind.fill(k.all.simple1,k.all.simple2)%>%
#   data.table()

```

```{r 网页具体信息, echo=FALSE}
f.infor <- function(k.webname) {
web<-read_html(k.webname,encoding = 'cp936')
kk <- web %>% html_nodes(".article_fenlei") %>% html_text()
unlist(strsplit(kk, "\r\n        "))%>%
  gsub("^.*?：","",.)%>%
  gsub(" \r\n      ","",.)%>%
  t(.)%>%
  data.table()
}

```

```{r , echo=FALSE}
### 2017年8月16日的数据库
#爬取数据
# k.b1 <- k.a[1:3000,k.link]%>%data.table(.)%>%apply(.,1,f.infor)%>%do.call(rbind.fill ,.)%>%data.table()
# k.b2 <- k.a[3001:6216,k.link]%>%data.table(.)%>%apply(.,1,f.infor)%>%do.call(rbind.fill ,.)%>%data.table()
```

```{r 数据合并, include=FALSE}
# 载入2017年8月16的数据，或更新的数据
k.c <- fread("iconv -f cp936 -t UTF-8 ./boshijob.csv")%>%.[,日期:=ymd(日期)]%>%select(.,-1) #%>%.[日期!="2017-08-16"]

# # 合并数据
# k.b <- rbind.fill(k.b1,k.b2)%>%
#   data.table()
# names(k.b) <- c("发布时间","截止日期","所属省份","工作地点","高校分类","招聘人数","报名方式","需求学科")
# k.c <- data.table(k.a,k.b)
# 
# k.c[,所属省份:=gsub(" ","",所属省份)]%>%
#   .[,工作地点:=gsub(" ","",工作地点)]%>%
#   .[,日期:=gsub("([.])","",日期)]%>%
#   .[,日期:=ymd(日期)]
# 
# # write.csv(k.c,"boshijob.csv",fileEncoding = "cp936")
# 
# k.c%>%select(.,-k.link)%>%kable(., format = "markdown")
# 
# k.c%>%select(.,-k.link)
# 
# names(k.c)
```

```{r part1数据更新, include=FALSE}
# 未更新的数据库为k.c

# 获取新增部分
k.a.add <- data.table(c(1:10))%>%apply(.,1,f.get)%>%
  do.call(rbind.fill ,.)%>%
  data.table()%>%
  .[grepl("view.php",k.link)==F,]%>%
  .[!k.link%in%k.c[,k.link],] # 去掉无法采集的链接

# 如果新增部分大于0，则进行详细信息采集，增进行信息合并，否则赋值k.c.new为旧数据k.c
if(nrow(k.a.add)!=0)  {
  k.b.add <- k.a.add[,k.link]%>%data.table(.)%>%apply(.,1,f.infor)%>%do.call(rbind.fill ,.)%>%data.table()
names(k.b.add) <- c("发布时间","截止日期","所属省份","工作地点","高校分类","招聘人数","报名方式","需求学科")
  k.c.new <- data.table(k.a.add,k.b.add)%>%.[,所属省份:=gsub(" ","",所属省份)]%>%
  .[,工作地点:=gsub(" ","",工作地点)]%>%
  .[,日期:=gsub("([.])","",日期)]%>%
  .[,日期:=ymd(日期)]
  k.c.all <- rbind.fill(k.c.new,k.c)%>%data.table()
}

if(nrow(k.a.add)==0)  k.c.all <- k.c

# k.c.all <-k.c.all[,.SD[1,],by=.(日期,题目)] 剔除重复项


# 保存新数据
write.csv(k.c.all,"boshijob.csv",fileEncoding = "cp936")

k.c.all <-k.c.all%>%select(.,日期,性质,题目,截止日期,所属省份,工作地点,高校分类,招聘人数,报名方式,需求学科,链接,k.link)

```


```{r , echo=FALSE,fig.width=8, fig.height=5}
# k.c.all[,.N,by=所属省份]%>%.[order(-N)]%>%.[N>10]%>%remapC(.,maptype = "china",color = c("red","gold","yellow", "skyblue","white"),theme = get_theme("Bright"))%>%plot(.)


k.pya <- k.c.all%>%.[,年月:=ymd(paste(substr(日期,1,7),"-01",sep=""))]%>%
  .[,.N,by=.(所属省份,年月,性质)]%>%.[order(-N)]


k.pya[所属省份=="北京",]%>%
  plot_ly(., x = ~年月, y = ~N, color = ~性质) %>%
  add_lines()

# k.pya[所属省份=="北京"&性质=="科研人才",]%>%.[order(-年月)]
# 
# 
# # 非博士后
# k.c.all[grepl("博士后",题目)==F&grepl("中国科学院",题目)==F&所属省份=="北京",]%>%select(.,-k.link)%>%kable(., format = "markdown") #&性质=="科研人才"

k.c.all%>%.[,link:= paste("<a href='",k.link,"' target='blank' >链接</a>",sep="")]%>%
  select(.,-链接,-k.link,-工作地点,-高校分类)%>%
  datatable(., escape = FALSE,
  caption = 'Table 1: 高端人才招聘信息汇总' ,options = list(
  pageLength = 50, autoWidth = TRUE
))
```
```{r , echo=FALSE}
k.c.all[grepl("部",题目)==T,]%>%.[,link:= paste("<a href='",k.link,"' target='blank' >链接</a>",sep="")]%>%
  select(.,-链接,-k.link)%>%
  datatable(., escape = FALSE,
  caption = 'Table 2: 高端人才招聘信息汇总-部')
```
# 2.企业招聘

```{r , echo=FALSE}
## 2.1 目录信息爬取
f.get.enter <- function(k.num) {
k.webname <-  paste("http://www.gaoxiaojob.com/zhaopin/qiye/index_",k.num,".html",sep="")
web<-read_html(k.webname,encoding = 'cp936')
k.day<- web %>% html_nodes(".ltitle small") %>% html_text(.)%>%gsub("([.()])","",.)
k.title <- web %>% html_nodes(".ltitle a") %>% html_text(.)%>%gsub("([.])","",.)
k.link <- web %>% html_nodes(".ltitle a")%>% html_attr(., "href")
k.link2 <-k.link%>%paste('[Link](', ., sep='')%>%paste(., ')', sep='')

k.name <- web %>% html_nodes(".list_b_info p a") %>% html_text(.)

k.place <- web %>% html_nodes(".style2 .lcompany") %>% html_text(.)
k.num <- web %>% html_nodes(".style2 .lsalary") %>% html_text(.)
k.deadline <- web %>% html_nodes(".style2 .ltime") %>% html_text(.)


data.table(日期=k.day,题目=k.title,地点=k.place,招聘人数=k.num,截止时间=k.deadline,链接=k.link2,k.link)
}

f.get.finance <- function(k.num) {
k.webname <-  paste("http://www.gaoxiaojob.com/zhaopin/yinhangzhaopinhui/index_",k.num,".html",sep="")
web<-read_html(k.webname,encoding = 'cp936')
k.day<- web %>% html_nodes(".ltitle small") %>% html_text(.)%>%gsub("([.()])","",.)
k.title <- web %>% html_nodes(".ltitle a") %>% html_text(.)%>%gsub("([.])","",.)
k.link <- web %>% html_nodes(".ltitle a")%>% html_attr(., "href")
k.link2 <-k.link%>%paste('[Link](', ., sep='')%>%paste(., ')', sep='')

k.name <- web %>% html_nodes(".list_b_info p a") %>% html_text(.)

k.place <- web %>% html_nodes(".style2 .lcompany") %>% html_text(.)
k.num <- web %>% html_nodes(".style2 .lsalary") %>% html_text(.)
k.deadline <- web %>% html_nodes(".style2 .ltime") %>% html_text(.)
data.table(日期=k.day,题目=k.title,地点=k.place,招聘人数=k.num,截止时间=k.deadline,链接=k.link2,k.link)
}

# k.a <- data.table(c(1:100))%>%apply(.,1,f.get)%>%
#  do.call(rbind.fill ,.)%>%
#  data.table()%>%
#  .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接



```

```{r , echo=FALSE} 
### 2017年8月16日的数据库
# k.a.en <- data.table(c(1:152))%>%apply(.,1,f.get.enter)%>%
#   do.call(rbind.fill ,.)%>%
#   data.table()%>%
#   .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接
# 
# k.a.fin <- data.table(c(1:67))%>%apply(.,1,f.get.finance)%>%
#   do.call(rbind.fill ,.)%>%
#   data.table()%>%
#   .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接
# 
# # 保存新数据
# write.csv(k.a.en,"enterprice.csv",fileEncoding = "cp936")
# write.csv(k.a.fin,"finance.csv",fileEncoding = "cp936")
```

```{r , echo=FALSE}
## 2.2 数据合并
# 载入2017年8月16的数据，或更新的数据
k.a.en1 <- fread("iconv -f cp936 -t UTF-8 ./enterprice.csv")%>%select(.,-1) #%>%.[日期!="0816"]
k.a.fin1 <- fread("iconv -f cp936 -t UTF-8 ./finance.csv")%>%select(.,-1) #%>%.[日期!="0816"]
# # 合并数据
# k.b <- rbind.fill(k.b1,k.b2)%>%
#   data.table()
# names(k.b) <- c("发布时间","截止日期","所属省份","工作地点","高校分类","招聘人数","报名方式","需求学科")
# k.c <- data.table(k.a,k.b)
# 
# k.c[,所属省份:=gsub(" ","",所属省份)]%>%
#   .[,工作地点:=gsub(" ","",工作地点)]%>%
#   .[,日期:=gsub("([.])","",日期)]%>%
#   .[,日期:=ymd(日期)]
# 
# # write.csv(k.c,"boshijob.csv",fileEncoding = "cp936")
# 
# k.c%>%select(.,-k.link)%>%kable(., format = "markdown")
# 
# k.c%>%select(.,-k.link)
# 
# names(k.c)
```


```{r , echo=FALSE}
## 2.3 数据更新
# 未更新的数据库为k.a.en1 k.a.fin1

# 获取新增部分
k.a.en <- data.table(c(1:10))%>%apply(.,1,f.get.enter)%>%
  do.call(rbind.fill ,.)%>%
  data.table()%>%
  .[grepl("view.php",k.link)==F,]%>%
  .[!k.link%in%k.a.en1[,k.link],]

k.a.fin <- data.table(c(1:10))%>%apply(.,1,f.get.finance)%>%
  do.call(rbind.fill ,.)%>%
  data.table()%>%
  .[grepl("view.php",k.link)==F,]%>%
  .[!k.link%in%k.a.fin1[,k.link],]

k.enter.new <- rbind.fill(k.a.en,k.a.en1)%>%data.table()
k.fin.new <- rbind.fill(k.a.fin,k.a.fin1)%>%data.table()


# 保存新数据
write.csv(k.enter.new,"enterprice.csv",fileEncoding = "cp936")
write.csv(k.fin.new,"finance.csv",fileEncoding = "cp936")

```
## 2.1 企业
```{r , echo=FALSE}
k.enter.new%>%head(.,400)%>%.[,link:= paste("<a href='",k.link,"' target='blank' >链接</a>",sep="")]%>%
  select(.,-链接,-k.link)%>%
  datatable(., escape = FALSE,
  caption = 'Table 3: 企业聘信息汇总' ,options = list(
  pageLength = 50, autoWidth = TRUE
))
```
## 2.2 银行
```{r , echo=FALSE}
k.fin.new%>%head(.,400)%>%.[,link:= paste("<a href='",k.link,"' target='blank' >链接</a>",sep="")]%>%
  select(.,-链接,-k.link)%>%
  datatable(., escape = FALSE,
  caption = 'Table 4: 银行聘信息汇总' ,options = list(
  pageLength = 50, autoWidth = TRUE
))

# %>%kable(., format = "markdown") 
```


# 3.政府事业单位招聘

```{r , echo=FALSE}
## 3.1 目录信息爬取
f.get.shiyedanwei <- function(k.num) {
k.webname <-  paste("http://www.gaoxiaojob.com/zhaopin/shiyedanwei/index_",k.num,".html",sep="")
web<-read_html(k.webname,encoding = 'cp936')
k.day<- web %>% html_nodes(".ltitle small") %>% html_text(.)%>%gsub("([.()])","",.)
k.title <- web %>% html_nodes(".ltitle a") %>% html_text(.)%>%gsub("([.])","",.)
k.link <- web %>% html_nodes(".ltitle a")%>% html_attr(., "href")
k.link2 <-k.link%>%paste('[Link](', ., sep='')%>%paste(., ')', sep='')

k.name <- web %>% html_nodes(".list_b_info p a") %>% html_text(.)

k.place <- web %>% html_nodes(".style2 .lcompany") %>% html_text(.)
k.num <- web %>% html_nodes(".style2 .lsalary") %>% html_text(.)
k.deadline <- web %>% html_nodes(".style2 .ltime") %>% html_text(.)


data.table(日期=k.day,题目=k.title,地点=k.place,招聘人数=k.num,截止时间=k.deadline,链接=k.link2,k.link)
}

f.get.sydw <- function(k.num) {
k.webname <-  paste("http://www.gaoxiaojob.com/zhaopin/sydw/guanban/index_",k.num,".html",sep="")
web<-read_html(k.webname,encoding = 'cp936')
k.day<- web %>% html_nodes(".ltitle small") %>% html_text(.)%>%gsub("([.()])","",.)
k.title <- web %>% html_nodes(".ltitle a") %>% html_text(.)%>%gsub("([.])","",.)
k.link <- web %>% html_nodes(".ltitle a")%>% html_attr(., "href")
k.link2 <-k.link%>%paste('[Link](', ., sep='')%>%paste(., ')', sep='')

k.name <- web %>% html_nodes(".list_b_info p a") %>% html_text(.)

k.place <- web %>% html_nodes(".style2 .lcompany") %>% html_text(.)
k.num <- web %>% html_nodes(".style2 .lsalary") %>% html_text(.)
k.deadline <- web %>% html_nodes(".style2 .ltime") %>% html_text(.)
data.table(日期=k.day,题目=k.title,地点=k.place,招聘人数=k.num,截止时间=k.deadline,链接=k.link2,k.link)
}

# k.a <- data.table(c(1:100))%>%apply(.,1,f.get)%>%
#  do.call(rbind.fill ,.)%>%
#  data.table()%>%
#  .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接



```

```{r , echo=FALSE}
### 2017年8月16日的数据库
# k.a.shiyedanwei <- data.table(c(1:665))%>%apply(.,1,f.get.shiyedanwei)%>%
#   do.call(rbind.fill ,.)%>%
#   data.table()%>%
#   .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接
# 
# k.a.sydw <- data.table(c(1:128))%>%apply(.,1,f.get.sydw)%>%
#   do.call(rbind.fill ,.)%>%
#   data.table()%>%
#   .[grepl("view.php",k.link)==F,] # 去掉无法采集的链接
# 
# # 保存新数据
# write.csv(k.a.shiyedanwei,"shiyedanwei.csv",fileEncoding = "cp936")
# write.csv(k.a.sydw,"sydw.csv",fileEncoding = "cp936")
```

```{r , echo=FALSE}
## 3.2 数据合并
# 载入2017年8月16的数据，或更新的数据
k.a.shiyedanwei1 <- fread("iconv -f cp936 -t UTF-8 ./shiyedanwei.csv")%>%select(.,-1) #%>%.[日期!="0816"]
k.a.sydw1 <- fread("iconv -f cp936 -t UTF-8 ./sydw.csv")%>%select(.,-1) #%>%.[日期!="0816"]
# # 合并数据
# k.b <- rbind.fill(k.b1,k.b2)%>%
#   data.table()
# names(k.b) <- c("发布时间","截止日期","所属省份","工作地点","高校分类","招聘人数","报名方式","需求学科")
# k.c <- data.table(k.a,k.b)
# 
# k.c[,所属省份:=gsub(" ","",所属省份)]%>%
#   .[,工作地点:=gsub(" ","",工作地点)]%>%
#   .[,日期:=gsub("([.])","",日期)]%>%
#   .[,日期:=ymd(日期)]
# 
# # write.csv(k.c,"boshijob.csv",fileEncoding = "cp936")
# 
# k.c%>%select(.,-k.link)%>%kable(., format = "markdown")
# 
# k.c%>%select(.,-k.link)
# 
# names(k.c)
```


```{r , echo=FALSE}
## 3.3 数据更新
# 未更新的数据库为k.a.en1 k.a.fin1

# 获取新增部分
k.a.shiyedanwei <- data.table(c(1:10))%>%apply(.,1,f.get.shiyedanwei)%>%
  do.call(rbind.fill ,.)%>%
  data.table()%>%
  .[grepl("view.php",k.link)==F,]%>%
  .[!k.link%in%k.a.shiyedanwei1[,k.link],]

k.a.sydw <- data.table(c(1:10))%>%apply(.,1,f.get.sydw)%>%
  do.call(rbind.fill ,.)%>%
  data.table()%>%
  .[grepl("view.php",k.link)==F,]%>%
  .[!k.link%in%k.a.sydw1[,k.link],]

k.shiyedanwei.new <- rbind.fill(k.a.shiyedanwei,k.a.shiyedanwei1)%>%data.table()
k.sydw.new <- rbind.fill(k.a.sydw,k.a.sydw1)%>%data.table()


# 保存新数据
write.csv(k.shiyedanwei.new,"shiyedanwei.csv",fileEncoding = "cp936")
write.csv(k.sydw.new,"sydw.csv",fileEncoding = "cp936")

```
## 3.1 事业单位
```{r , echo=FALSE}
k.shiyedanwei.new%>%head(.,2000)%>%.[,link:= paste("<a href='",k.link,"' target='blank' >链接</a>",sep="")]%>%
  select(.,-链接,-k.link)%>%
  datatable(., escape = FALSE,
  caption = 'Table 3: 事业单位招聘信息汇总' ,options = list(
  pageLength = 50, autoWidth = TRUE
))
```
## 3.2 国有企业
```{r , echo=FALSE}
k.sydw.new%>%head(.,1000)%>%.[,link:= paste("<a href='",k.link,"' target='blank' >链接</a>",sep="")]%>%
  select(.,-链接,-k.link)%>%
  datatable(., escape = FALSE,
  caption = 'Table 4: 国有企业招聘信息汇总' ,options = list(
  pageLength = 50, autoWidth = TRUE
))

# %>%kable(., format = "markdown") 
```
# 4.智联招聘
```{r, echo=FALSE}
f.get.zhilian <- function(k.num) {
k.web <- "http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%E5%8C%97%E4%BA%AC%2b%E4%B8%8A%E6%B5%B7%2b%E5%B9%BF%E5%B7%9E%2b%E6%B7%B1%E5%9C%B3%2b%E6%9D%AD%E5%B7%9E&sm=0&sf=0&st=99999&el=1&et=2&isadv=1&sg=5087d5641895447b9b6038b8379eee2f&p="
k.webname <-  paste(k.web,k.num,sep="")

web<-read_html(k.webname)

k.l1 <- web %>% html_nodes(".zwmc a")%>% html_attr(.,"href")%>%data.table(.)
k.l2 <- web %>% html_nodes(".zwmc a")%>% html_text(.)%>%data.table(.)
k.l3 <- web %>% html_nodes(".gsmc a:nth-child(1)")%>% html_attr(.,"href")%>%data.table(.)
k.l4 <- web %>% html_nodes(".gsmc a:nth-child(1)")%>% html_text(.)%>%data.table(.)
k.l5 <- web %>% html_nodes(".zwyx")%>% html_text(.)%>%data.table(.)%>%.[-1,]
k.l6 <- web %>% html_nodes(".gzdd")%>% html_text(.)%>%data.table(.)%>%.[-1,]
k.l7 <- web %>% html_nodes(".gxsj span")%>% html_text(.)%>%data.table(.)
k.l8 <- web %>% html_nodes(".newlist_deatil_last")%>% html_text(.)%>%data.table(.)

k.t <- data.table(k.l2,k.l4,k.l5,k.l6,k.l7,k.l1,k.l3,k.l8)

names(k.t) <-c("职位名称","公司名称","职位月薪","工作地点","发布日期","地址1","地址2","备注")

k.t
}
```

```{r, echo=FALSE}
# k.zl <- data.table(c(1:300))%>%apply(.,1,f.get.zhilian)%>%
#   do.call(rbind.fill ,.)%>%
#   data.table()
# 
# write.csv(k.zl,"zhilian.csv",fileEncoding = "utf-8")
```

```{r, echo=FALSE}
k.zl0<- fread("iconv -f utf-8 -t UTF-8 ./zhilian.csv")%>%select(.,-1) 

# 获取新增部分
k.zl2 <- data.table(c(1:5))%>%apply(.,1,f.get.zhilian)%>%
  do.call(rbind.fill ,.)%>%
  data.table()%>%
  .[!`职位名称`%in%k.zl0[,`职位名称`]&!`公司名称`%in%k.zl0[,`公司名称`],]

k.zl.new <- rbind.fill(k.zl2,k.zl0)%>%data.table()


# 保存新数据
write.csv(k.zl.new,"zhilian.csv",fileEncoding = "utf-8")
```

```{r}
k.zl.new%>%
  .[,地址a:= paste("<a href='",地址1,"' target='blank' >链接1</a>",sep="")]%>%
  .[,地址b:= paste("<a href='",地址2,"' target='blank' >链接2</a>",sep="")]%>%
  select(.,-地址1,-地址2)%>%
  datatable(., escape = FALSE,
  caption = 'Table 3: 事业单位招聘信息汇总' ,options = list(
  pageLength = 50, autoWidth = TRUE
))
```

```

