Scraping Salary data for Booz Allen

first load the web page

devtools::install_github("mguideng/gdscrapeR")
## Skipping install of 'gdscrapeR' from a github remote, the SHA1 (a8e5f935) has not changed since last install.
##   Use `force = TRUE` to force installation
library(gdscrapeR)
## Warning: replacing previous import 'purrr::pluck' by 'rvest::pluck' when
## loading 'gdscrapeR'
library(rvest)
## Warning: package 'rvest' was built under R version 3.5.3
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.5.2
url <- 'https://www.glassdoor.com/Salary/Booz-Allen-Hamilton-Washington-DC-Salaries-EI_IE2735.0,19_IL.20,33_IM911.htm'
webpage <- read_html(url)

Then find the htmlnode that represents the title and salary assign it to a string

Name_data_html <- html_nodes(webpage,'.salaryRow__JobInfoStyle__jobTitle')
salary_data_html <- html_nodes(webpage, '.salaryRow__SalaryRowStyle__amt .strong')
name_data <- html_text(Name_data_html)
head(name_data)
## [1] "Associate"                  "Senior Consultant"         
## [3] "Consultant"                 "Lead Associate"            
## [5] "Senior Consultant Level II" "Consultant I"
salary_data <- html_text(salary_data_html)
head(salary_data, 10)
##  [1] "$103,890" "$76,872"  "$62,188"  "$138,459" "$86,982"  "$64,172" 
##  [7] "$170,890" "$125,848" "$77,341"  "$140,736"

Merge strings into data frame

jobs_df<-data.frame(Title = name_data, Salary = salary_data)
jobs_df[] <- lapply(jobs_df, as.character)