library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows

Looking at the Glassdoor Data Science Job Postings scraped on 06/05/2020

data source

temp1 <- read.csv2("G:/My Documents/Data_Job_SF.csv", sep = ",")
temp2 <- read.csv2("G:/My Documents/Data_Job_TX.csv", sep = ",")
temp3 <- read.csv2("G:/My Documents/Data_Job_WA.csv", sep = ",")

all_data <-
  bind_rows(temp1, temp2, temp3) %>% filter(str_detect(Job_title, "Data Scientist") == TRUE) 

dim(all_data)
## [1] 608  12

Data fields

names(all_data) %>% kbl() %>% kable_styling()
x
Job_title
Company
State
City
Min_Salary
Max_Salary
Job_Desc
Industry
Rating
Date_Posted
Valid_until
Job_Type

Industry

all_data %>% count(Industry) %>% arrange(desc(n)) %>% kbl() %>% kable_styling()
Industry n
Information Technology 188
Business Services 128
86
Aerospace & Defense 56
Government 22
Biotech & Pharmaceuticals 18
Finance 17
Retail 17
Manufacturing 13
Oil, Gas, Energy & Utilities 13
Insurance 12
Accounting & Legal 9
Health Care 8
Media 8
Education 3
Consumer Services 2
Real Estate 2
Transportation & Logistics 2
Arts, Entertainment & Recreation 1
Construction, Repair & Maintenance 1
Non-Profit 1
Telecommunications 1

Descriptions

library(tidytext)
library(wordcloud)
## Loading required package: RColorBrewer
tidy_desc <- all_data %>%
  group_by(Industry) %>%
  mutate(
    linenumber = row_number()) %>%
  ungroup() %>%
  unnest_tokens(word, Job_Desc) %>% select(Industry, linenumber, word)

custom_stop_words <- bind_rows(tibble(word = c("data", "science", "experience"),  
                                      lexicon = c("custom")), 
                               stop_words)

it_df <- tidy_desc %>% filter(Industry == 'Information Technology')

it_df %>%
  anti_join(custom_stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 5))
## Joining, by = "word"

bs_df <- tidy_desc %>% filter(Industry == 'Business Services')

bs_df %>%
  anti_join(custom_stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 5))
## Joining, by = "word"

ad_df <- tidy_desc %>% filter(Industry == 'Aerospace & Defense')

ad_df %>%
  anti_join(custom_stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 5))
## Joining, by = "word"

bp_df <- tidy_desc %>% filter(Industry == 'Biotech & Pharmaceuticals')

bp_df %>%
  anti_join(custom_stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 5))
## Joining, by = "word"

+160k Data Related Jobs Listings Extracted From Glassdoor link

Limited to Data Scientists

temp4 <- read.csv2("G:/My Documents/glassdoor.csv", sep = ",")

temp4 <- temp4 %>% filter(str_detect(header.jobTitle, "Data Scientist") == TRUE)

dim(temp4)
## [1] 6184  163

Country Harder to extract country

count(temp4, map.country) %>% kbl() %>% kable_styling()
map.country n
1726
ae 2
AE 34
AR 16
Argentina 5
AT 39
AU 41
Australia 44
Austria 24
Bangalore 4
BE 67
Belgium 8
Bengaluru 1
BG 3
BR 27
Brazil 7
ca 9
CA 139
CAN 2
Canada 37
CH 78
CHE 80
Chennai 1
China 16
CL 1
CN 6
CO 1
Costa Rica 2
Croatia 2
Czech Republic 2
de 4
DE 415
Denmark 11
Deutschland 5
DK 7
EE 1
EG 4
Egypt 3
England 8
es 2
ES 33
FI 1
Finland 8
FR 307
FRA 4
France 89
FRANCE 5
gb 1
GB 150
Germany 80
GR 1
Greece 11
HK 66
Hong Kong 50
HU 4
Hungary 4
ID 48
IE 24
IL 176
in 7
IN 363
India 67
Indonesia 4
IR 1
Ireland 23
Israel 38
it 1
IT 25
Italy 48
Japan 11
jp 2
JP 15
Kenya 3
Kingdom 20
KR 6
LT 2
lu 1
LU 6
Malaysia 5
Mexico 10
mx 1
MX 32
MY 39
Nederland 2
Netherlands 24
New Zealand 9
ng 1
NG 11
NL 261
NO 2
NZ 7
Pakistan 1
PH 49
Philippines 5
PK 5
PL 62
Poland 20
Portugal 5
PT 13
Romania 6
RS 2
RU 3
Russia 1
SA 1
Saudi Arabia 4
SE 15
Serbia 1
SG 162
SGP 2
SI 1
Singapore 42
SINGAPORE 2
Slovakia 2
South Africa 12
Sp 3
Spain 45
Sweden 48
Switzerland 13
Taiwan 1
TH 2
Thailand 3
TR 23
Turkey 6
TW 6
UA 7
UK 33
Ukraine 1
United Arab Emirates 10
United Kingdom 267
United States 46
UNITED STATES 4
Uruguay 1
us 5
US 191
USA 7
Vietnam 2
VN 6
ZA 45

Data fields Potentially a lot of uninteresting fields Does include salary, industry and description

names(temp4) %>% kbl() %>% kable_styling()
x
benefits.benefitRatingDecimal
benefits.comments
benefits.highlights
benefits.numRatings
benefits.employerSummary
breadCrumbs
gaTrackerData.category
gaTrackerData.empId
gaTrackerData.empName
gaTrackerData.empSize
gaTrackerData.expired
gaTrackerData.industry
gaTrackerData.industryId
gaTrackerData.jobId.long
gaTrackerData.jobId.int
gaTrackerData.jobTitle
gaTrackerData.location
gaTrackerData.locationId
gaTrackerData.locationType
gaTrackerData.pageRequestGuid.guid
gaTrackerData.pageRequestGuid.guidValid
gaTrackerData.pageRequestGuid.part1
gaTrackerData.pageRequestGuid.part2
gaTrackerData.sector
gaTrackerData.sectorId
gaTrackerData.profileConversionTrackingParams.trackingCAT
gaTrackerData.profileConversionTrackingParams.trackingSRC
gaTrackerData.profileConversionTrackingParams.trackingXSP
gaTrackerData.jobViewTrackingResult.jobViewDisplayTimeMillis
gaTrackerData.jobViewTrackingResult.requiresTracking
gaTrackerData.jobViewTrackingResult.trackingUrl
header.adOrderId
header.advertiserType
header.applicationId
header.applyButtonDisabled
header.applyUrl
header.blur
header.coverPhoto
header.easyApply
header.employerId
header.employerName
header.expired
header.gocId
header.hideCEOInfo
header.jobTitle
header.locId
header.location
header.locationType
header.logo
header.logo2x
header.organic
header.overviewUrl
header.posted
header.rating
header.saved
header.savedJobId
header.sgocId
header.sponsored
header.userAdmin
header.uxApplyType
header.featuredVideo
header.normalizedJobTitle
header.urgencyLabel
header.urgencyLabelForMessage
header.urgencyMessage
header.needsCommission
header.payHigh
header.payLow
header.payMed
header.payPeriod
header.salaryHigh
header.salaryLow
header.salarySource
job.description
job.discoverDate
job.eolHashCode
job.importConfigId
job.jobReqId.long
job.jobReqId.int
job.jobSource
job.jobTitleId
job.listingId.long
job.listingId.int
map.country
map.employerName
map.lat
map.lng
map.location
map.address
map.postalCode
overview.allBenefitsLink
overview.allPhotosLink
overview.allReviewsLink
overview.allSalariesLink
overview.foundedYear
overview.hq
overview.industry
overview.industryId
overview.revenue
overview.sector
overview.sectorId
overview.size
overview.stock
overview.type
overview.description
overview.mission
overview.website
overview.allVideosLink
overview.competitors
overview.companyVideo
photos
rating.ceo.name
rating.ceo.photo
rating.ceo.photo2x
rating.ceo.ratingsCount
rating.ceoApproval
rating.recommendToFriend
rating.starRating
reviews
salary.country.cc3LetterISO
salary.country.ccISO
salary.country.continent.continentCode
salary.country.continent.continentName
salary.country.continent.id
salary.country.continent.new
salary.country.countryFIPS
salary.country.currency.currencyCode
salary.country.currency.defaultFractionDigits
salary.country.currency.displayName
salary.country.currency.id
salary.country.currency.name
salary.country.currency.negativeTemplate
salary.country.currency.new
salary.country.currency.positiveTemplate
salary.country.currency.symbol
salary.country.currencyCode
salary.country.defaultLocale
salary.country.defaultName
salary.country.defaultShortName
salary.country.employerSolutionsCountry
salary.country.id
salary.country.longName
salary.country.major
salary.country.name
salary.country.new
salary.country.population
salary.country.shortName
salary.country.tld
salary.country.type
salary.country.uniqueName
salary.country.usaCentricDisplayName
salary.currency.currencyCode
salary.currency.defaultFractionDigits
salary.currency.displayName
salary.currency.id
salary.currency.name
salary.currency.negativeTemplate
salary.currency.new
salary.currency.positiveTemplate
salary.currency.symbol
salary.lastSalaryDate
salary.salaries
wwfu

Industry

temp4 %>% count(overview.industry) %>% arrange(desc(n)) %>% kbl() %>% kable_styling()
overview.industry n
1817
IT Services 466
Staffing & Outsourcing 466
Internet 408
Consulting 348
Computer Hardware & Software 336
Enterprise Software & Network Solutions 257
Biotech & Pharmaceuticals 181
Advertising & Marketing 136
Insurance Operators 119
Investment Banking & Asset Management 111
Accounting 107
Banks & Building Societies 105
Publishing 61
Research & Development 58
Healthcare Services & Hospitals 56
Aerospace & Defence 55
Energy 49
Video Games 48
Telecommunications Services 47
Electrical & Electronic Manufacturing 42
Lending 42
Transportation Management 42
Industrial Manufacturing 40
Cable, Internet & Telephone Providers 37
Financial Transaction Processing 35
Government Agencies 33
Financial Analytics & Research 31
Department, Clothing, & Shoe Shops 27
Transportation Equipment Manufacturing 27
Food & Drink Manufacturing 26
Travel Agencies 25
Consumer Products Manufacturing 22
Other Retail Shops 22
Chemical Manufacturing 20
Social Services 20
Architectural & Engineering Services 19
Education Training Services 19
Miscellaneous Manufacturing 19
Wholesale 19
Airlines 18
Gambling 18
Health, Beauty & Fitness 18
TV Broadcasting & Cable Networks 17
General Merchandise & Superstores 16
Insurance Agencies & Brokerages 16
Grocery Shops & Supermarkets 14
Colleges & Universities 13
Music Production & Distribution 13
Oil & Gas Exploration & Production 13
Utilities 12
Vehicle Dealers 12
Estate Agents 11
Hotel & Resorts 11
Sports & Recreation 11
Consumer Product Hire 10
Logistics & Supply Chain 10
Sporting Goods Shops 9
Beauty & Personal Accessories Shops 8
Consumer Electronics & Appliance Shops 8
Film Production & Distribution 8
Farm Support Services 6
Fast-Food & Quick-Service Restaurants 6
Healthcare Product Manufacturing 6
Mining 6
Camping & Caravan Parks 5
Catering & Food Service Contractors 5
Brokerage Services 4
Building & Construction 4
Express Delivery Services 4
Food & Beverage Shops 4
Home Furniture and Houseware Shops 4
Media & Entertainment Retail Shops 4
Oil & Gas Services 4
Security Services 4
Shipping 4
Ticket Sales 4
Car Hire 3
Charitable Foundations 3
Food Production 3
Pharmacies & Health Shops 3
Primary & Secondary Education 3
Vehicle Repair & Maintenance 3
Commercial Equipment Repair & Maintenance 2
Haulage 2
Legal 2
Membership Organisations 2
Metal & Mineral Manufacturing 2
Motor Vehicle Parts & Accessories Shops 2
Museums, Zoos & Amusement Parks 2
Pet & Pet Supply Shops 2
Removal Services 2
Bus & Coach Services 1
Casual Restaurants 1
Convenience Stores & Roadside Services 1
General Repair & Maintenance 1
Local Councils 1
News Outlets 1
Radio 1
Rail Freight 1
Telecommunications Manufacturing 1
Venture Capital & Private Equity 1