Scraping Salary data for Booz Allen
devtools::install_github("mguideng/gdscrapeR")
## Skipping install of 'gdscrapeR' from a github remote, the SHA1 (a8e5f935) has not changed since last install.
## Use `force = TRUE` to force installation
library(gdscrapeR)
## Warning: replacing previous import 'purrr::pluck' by 'rvest::pluck' when
## loading 'gdscrapeR'
library(rvest)
## Warning: package 'rvest' was built under R version 3.5.3
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.5.2
url <- 'https://www.glassdoor.com/Salary/Booz-Allen-Hamilton-Washington-DC-Salaries-EI_IE2735.0,19_IL.20,33_IM911.htm'
webpage <- read_html(url)
Name_data_html <- html_nodes(webpage,'.salaryRow__JobInfoStyle__jobTitle')
salary_data_html <- html_nodes(webpage, '.salaryRow__SalaryRowStyle__amt .strong')
name_data <- html_text(Name_data_html)
head(name_data)
## [1] "Associate" "Senior Consultant"
## [3] "Consultant" "Lead Associate"
## [5] "Senior Consultant Level II" "Consultant I"
salary_data <- html_text(salary_data_html)
head(salary_data, 10)
## [1] "$103,890" "$76,872" "$62,188" "$138,459" "$86,982" "$64,172"
## [7] "$170,890" "$125,848" "$77,341" "$140,736"
jobs_df<-data.frame(Title = name_data, Salary = salary_data)
jobs_df[] <- lapply(jobs_df, as.character)