Most of the libraries aren’t used. I just can’t remember what I used and what I haven’t so I kept them all.
Query Glassdoor’s API
#To call Glassdoor's (GD) API you must register with them:
#Parameters used are:
#format=json
#t.p (ID given by GD)
#t.k (Key given by GD)
#jt (jobTitle = "data scientist")
#RETURNS A RESPONSE OBJECT
url_employers <- GET("http://api.glassdoor.com/api/api.htm?v=1&format=json&t.p=210321&t.k=iYOyroMnvHG&action=jobs-stats&userip=138.207.165.107&useragent=Mozilla%2F5.0&jt=data%20scientist&returnJobTitles=true&returnEmployers=true")
Display the RESPONSE object as a parsed list. Convert to JSON format
emp_content <- content(url_employers, as = "parsed")
json_emp <- toJSON(emp_content)
Separate the JSON object into a list of data frame objects. Pull the data frames out of the list.
emp_list <- fromJSON(json_emp)
emp_df_employers <- emp_list$response$employers
emp_df_jobtitle <- emp_list$response$jobTitles
Convert all columns from type ‘list’ to type ‘character’. Then convert the columns you need from ‘chr’ to ‘int’.
#employers data frame
emp_df_employers <- emp_df_employers %>%
mutate_all(as.character)
emp_df_employers$id <- as.numeric(emp_df_employers$id)
emp_df_employers$numJobs <- as.numeric(emp_df_employers$numJobs)
emp_df_employers$rating <- as.numeric(emp_df_employers$rating)
emp_df_employers$numberOfReviews <- as.numeric(emp_df_employers$numberOfReviews)
#jobtitel data frame
emp_df_jobtitle <- emp_df_jobtitle %>%
mutate_all(as.character)
emp_df_jobtitle$id <- as.numeric(emp_df_jobtitle$id)
emp_df_jobtitle$numJobs <- as.numeric(emp_df_jobtitle$numJobs)
Tidy
emp <- emp_df_employers %>%
select(name, numJobs, rating)
sort(emp$Company)
## Warning in is.na(x): is.na() applied to non-(list or vector) of type 'NULL'
## NULL
colnames(emp) <- c("Company", "Jobs", "Rating")
emp
## Company Jobs Rating
## 1 Amazon 662 3.6
## 2 Booz Allen Hamilton 491 3.7
## 3 Deloitte 419 3.8
## 4 IBM 356 3.5
## 5 Bank of America 278 3.5
## 6 Anthem 269 3.4
## 7 KPMG 255 3.8
## 8 J.P. Morgan 245 3.7
## 9 Citi 199 3.5
## 10 Maverick Trading 143 3.8
## 11 Capital One 136 3.8
## 12 Citibank 126 3.4
## 13 General Dynamics Information Technology 122 3.4
## 14 Kforce 104 3.2
## 15 UnitedHealth Group 101 3.3
## 16 Oracle 97 3.4
## 17 Walmart eCommerce 97 3.2
## 18 Walmart 96 3.2
## 19 Accenture 94 3.8
## 20 eBay 91 3.8
## 21 Aetna 88 3.3
p <- ggplot(emp,aes(x = Company,Jobs, colour=Rating))+
geom_bar(position = 'dodge',stat = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = .5))+
labs("Data Science Jobs per Company", subtitle = "From GlassDoor API", y = "Number of Jobs", x = "Company")
ggplotly(p)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
Write to file
write.csv(emp_df_employers, file = "GDemployers.csv")
write.csv(emp_df_jobtitle, file = "GDjobtitle.csv")