GlassDoorAPI

Most of the libraries aren’t used. I just can’t remember what I used and what I haven’t so I kept them all.

Query Glassdoor’s API

#To call Glassdoor's (GD) API you must register with them:
#Parameters used are:
#format=json
#t.p (ID given by GD)
#t.k (Key given by GD)
#jt (jobTitle = "data scientist")

#RETURNS A RESPONSE OBJECT
url_employers <- GET("http://api.glassdoor.com/api/api.htm?v=1&format=json&t.p=210321&t.k=iYOyroMnvHG&action=jobs-stats&userip=138.207.165.107&useragent=Mozilla%2F5.0&jt=data%20scientist&returnJobTitles=true&returnEmployers=true")

Display the RESPONSE object as a parsed list. Convert to JSON format

emp_content <- content(url_employers, as = "parsed")

json_emp <- toJSON(emp_content)

Separate the JSON object into a list of data frame objects. Pull the data frames out of the list.

emp_list <- fromJSON(json_emp)

emp_df_employers <- emp_list$response$employers
emp_df_jobtitle <- emp_list$response$jobTitles

Convert all columns from type ‘list’ to type ‘character’. Then convert the columns you need from ‘chr’ to ‘int’.

#employers data frame
emp_df_employers <- emp_df_employers %>%
  mutate_all(as.character)

emp_df_employers$id <- as.numeric(emp_df_employers$id)
emp_df_employers$numJobs <- as.numeric(emp_df_employers$numJobs)
emp_df_employers$rating <- as.numeric(emp_df_employers$rating)
emp_df_employers$numberOfReviews <- as.numeric(emp_df_employers$numberOfReviews)

#jobtitel data frame
emp_df_jobtitle <- emp_df_jobtitle %>%
  mutate_all(as.character)

emp_df_jobtitle$id <- as.numeric(emp_df_jobtitle$id)
emp_df_jobtitle$numJobs <- as.numeric(emp_df_jobtitle$numJobs)

Tidy

emp <- emp_df_employers %>%
  select(name, numJobs, rating)
sort(emp$Company)

## Warning in is.na(x): is.na() applied to non-(list or vector) of type 'NULL'

## NULL

colnames(emp) <- c("Company", "Jobs", "Rating")
emp

##                                    Company Jobs Rating
## 1                                   Amazon  662    3.6
## 2                      Booz Allen Hamilton  491    3.7
## 3                                 Deloitte  419    3.8
## 4                                      IBM  356    3.5
## 5                          Bank of America  278    3.5
## 6                                   Anthem  269    3.4
## 7                                     KPMG  255    3.8
## 8                              J.P. Morgan  245    3.7
## 9                                     Citi  199    3.5
## 10                        Maverick Trading  143    3.8
## 11                             Capital One  136    3.8
## 12                                Citibank  126    3.4
## 13 General Dynamics Information Technology  122    3.4
## 14                                  Kforce  104    3.2
## 15                      UnitedHealth Group  101    3.3
## 16                                  Oracle   97    3.4
## 17                       Walmart eCommerce   97    3.2
## 18                                 Walmart   96    3.2
## 19                               Accenture   94    3.8
## 20                                    eBay   91    3.8
## 21                                   Aetna   88    3.3

p <- ggplot(emp,aes(x = Company,Jobs, colour=Rating))+
      geom_bar(position = 'dodge',stat = "identity") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = .5))+
      labs("Data Science Jobs per Company", subtitle = "From GlassDoor API", y = "Number of Jobs", x = "Company")
      
            
      
ggplotly(p)

## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Write to file

write.csv(emp_df_employers, file = "GDemployers.csv")
write.csv(emp_df_jobtitle, file = "GDjobtitle.csv")

GlassDoorAPI

Chad Smith

October 17, 2017