An Analysis of Annual Salaries of Data Science-Related Jobs

library(chromote)
library(htmltools)
library(rvest)
library(readxl)
library(plotly)
library(leaflet)
library(sf)
library(ggplot2)
library(maps)
library(RColorBrewer) # lots of color palettes for these kind of charts
library(data.table) # for sorting by key
library(mapproj) #coord_maps() needed this
#library(lattice)
#library(vegalite)
library(tidyverse)
#library(leaflet.minicharts)
library(magrittr)
library(usmap)

For this analysis, I will compare the average salaries of top Data Science-related job roles, including Data Scientist, Data Analyst, Data Engineer, Big Data Engineer, Data Manager, Data Architect, Data Visualization Engineer, Machine Learning Engineer, and Business Analyst. Additionally, I will compare the salary of each role by state. Data for this analysis was obtained from ZipRecruiter.

Obtaining Data for Each Job Role

Data Scientist

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-DATA-Scientist-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577655.2

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_data_scientist

df_main <- df_data_scientist %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(DS_Annual_Salary = `Annual Salary`)

Data Analyst

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-Data-Analyst-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577655.8

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_data_analyst

df_data_analyst2 <- df_data_analyst %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(Data_Analyst_Annual_Salary = `Annual Salary`)

Big Data Engineer

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-BIG-DATA-Engineer-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577656.5

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_big_data_engineer

df_big_data_engineer2 <- df_big_data_engineer %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(Big_Data_Engineer_Annual_Salary = `Annual Salary`)

Machine Learning Engineer

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-Machine-Learning-Engineer-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577657.1

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_ml_engineer

df_ml_engineer2 <- df_ml_engineer %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(ML_Engineer_Annual_Salary = `Annual Salary`)

Business Analyst

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-Business-Analyst-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577657.7

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_business_analyst

df_business_analyst2 <- df_business_analyst %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(Business_Analyst_Annual_Salary = `Annual Salary`)

Lead Data Architect

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-Lead-DATA-Architect-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577658.2

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_data_architect

df_data_architect2 <- df_data_architect %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(Data_Architect_Annual_Salary = `Annual Salary`)

Data Manager

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-Data-Manager-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577658.9

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_data_manager

df_data_manager2 <- df_data_manager %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(Data_Manager_Annual_Salary = `Annual Salary`)

Data Visualization Engineer

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-Data-Visualization-Engineer-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577659.4

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_data_visualization

df_data_visualization2 <- df_data_visualization %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(Data_Visualization_Annual_Salary = `Annual Salary`)

Data Engineer

uri <- r"(https://www.ziprecruiter.com/Salaries/What-Is-the-Average-DATA-Engineer-Salary-by-State)"

b <- ChromoteSession$new()
b$Network$setUserAgentOverride(userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")

## named list()

{b$Page$navigate(uri)
b$Page$loadEventFired()}

## $timestamp
## [1] 577660

resp <- b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value
  
resp %>%
  read_html() %>%
  html_element(".salary_by_state_table") %>%
  {if( html_text(.) |> nchar() > 100) html_table(.) else tibble(State = character(),`Annual Salary` = numeric())} -> df_data_engineer

df_data_engineer2 <- df_data_engineer %>% 
  select(State, `Annual Salary`) %>%
      mutate(
        across(.cols = c("Annual Salary"),
               .fns = parse_number)
        ) %>%
  rename(Data_Engineer_Annual_Salary = `Annual Salary`)

Combining All Datasets:

all_data <- df_main %>% 
  left_join(df_data_analyst2, by="State") %>%
  left_join(df_big_data_engineer2, by="State") %>%
  left_join(df_ml_engineer2, by="State") %>%
  left_join(df_business_analyst2, by="State") %>%
  left_join(df_data_architect2, by="State") %>%
  left_join(df_data_manager2, by="State") %>%
  left_join(df_data_visualization2, by="State") %>%
  left_join(df_data_engineer2, by="State")
all_data

## # A tibble: 50 × 10
##    State         DS_Annual_Salary Data_Analyst_Annual_S…¹ Big_Data_Engineer_An…²
##    <chr>                    <dbl>                   <dbl>                  <dbl>
##  1 New York                145027                   98238                 136712
##  2 California              143099                   75874                 132134
##  3 Vermont                 130783                   72641                 122857
##  4 Maine                   129931                   69468                 120340
##  5 Massachusetts           128900                   83473                 125443
##  6 Nevada                  128653                   83624                 124884
##  7 New Jersey              127259                   84878                 121320
##  8 Wisconsin               126987                   84340                 121427
##  9 Washington              126680                   81244                 143356
## 10 Oregon                  125467                   81828                 121511
## # ℹ 40 more rows
## # ℹ abbreviated names: ¹Data_Analyst_Annual_Salary,
## #   ²Big_Data_Engineer_Annual_Salary
## # ℹ 6 more variables: ML_Engineer_Annual_Salary <dbl>,
## #   Business_Analyst_Annual_Salary <dbl>, Data_Architect_Annual_Salary <dbl>,
## #   Data_Manager_Annual_Salary <dbl>, Data_Visualization_Annual_Salary <dbl>,
## #   Data_Engineer_Annual_Salary <dbl>

Obtaining Average Salaries for Each Role

mean_of_each_role <- colMeans(all_data[sapply(all_data, is.numeric)], na.rm=TRUE)
new_mean_df2 <- as.data.frame(mean_of_each_role)

new_mean_df2 <- tibble::rownames_to_column(new_mean_df2, "Job_Titles")

new_mean_df2

##                         Job_Titles mean_of_each_role
## 1                 DS_Annual_Salary         116193.76
## 2       Data_Analyst_Annual_Salary          75121.02
## 3  Big_Data_Engineer_Annual_Salary         114454.82
## 4        ML_Engineer_Annual_Salary         116884.48
## 5   Business_Analyst_Annual_Salary          86956.96
## 6     Data_Architect_Annual_Salary         126962.74
## 7       Data_Manager_Annual_Salary          94585.12
## 8 Data_Visualization_Annual_Salary         116432.52
## 9      Data_Engineer_Annual_Salary         119563.08

Visualization

By Role

t1 <- list(family="Arial", size=14, color="black")
t2 <- list(family="sans serif", size=16, color="blue")


fig <- plot_ly(all_data, y = ~DS_Annual_Salary, name = 'Data Scientist', type = 'scatter', mode = 'line',
        line = list(color = 'PuRd', width = 4)) 
fig <- fig %>% add_trace(y = ~Data_Analyst_Annual_Salary, name = 'Data Analyst', line = list(color = 'red', width = 4)) 
fig <- fig %>% add_trace(y = ~Big_Data_Engineer_Annual_Salary, name = 'Big Data Enigeer', line = list(color = 'PRGn', width = 4)) 
fig <- fig %>% add_trace(y = ~ML_Engineer_Annual_Salary, name = 'ML Engineer', line = list(color = 'black', width = 4)) 
fig <- fig %>% add_trace(y = ~Business_Analyst_Annual_Salary, name = 'Business Analyst', line = list(color = 'yellow', width = 4)) 
fig <- fig %>% add_trace(y = ~Data_Architect_Annual_Salary, name = 'Data Architect', line = list(color = 'blue', width = 4)) 
fig <- fig %>% add_trace(y = ~Data_Manager_Annual_Salary, name = 'Data Manager', line = list(color = 'orange', width = 4))
fig <- fig %>% add_trace(y = ~Data_Visualization_Annual_Salary, name = 'Data Visualization', line = list(color = 'RdBu', width = 4))
fig <- fig %>% add_trace(y = ~Data_Engineer_Annual_Salary, name = 'Data Engineer', line = list(color = 'BuGn', width = 4))
fig <- fig %>% layout(title = "Variations of Annual Salaries for Each Job Title", font=t1,
         xaxis = list(title = "State Count", rangemode="tozero"),
         yaxis = list (title = "Annual Salary", rangemode="tozero"),
         annotations = list(text="Data Architect has the Highest Average, Data Analyst the Lowest", 
                                                                   font=t2,
                                                                   showarrow=FALSE,
                                                                   xref = "paper", x=0.6,
                                                                   yref = "paper", y= 1))



fig

t1 <- list(family="Arial", size=14, color="black")
t2 <- list(family="sans serif", size=20, color="blue")

fig <- plot_ly(new_mean_df2, x = ~Job_Titles, y=~mean_of_each_role, type = 'bar',
             text = mean_of_each_role, textposition = 'auto',
             marker = list(color = 'rgb(158,202,225)',
                           line = list(color = 'rgb(8,48,107)', width = 1.5)))
fig <- fig %>% layout(title = "Average Annual Salary for Each Role in the U.S.", font=t1,
         xaxis = list(categoryorder = "total descending", title = "Job Titles"),
         yaxis = list(title = "Annual Salary"), 
         annotations = list(text="Data Architect has the Highest Average, Data Analyst the Lowest", 
                                                                   font=t2,
                                                                   showarrow=FALSE,
                                                                   xref = "paper", x=0.6,
                                                                   yref = "paper", y= 1))

fig

Variation of Salaries by State

all_states <- map_data("state")
#state_labels <- usmapdata::centroid_labels("states")

# You need to merge dataset with maps one with long and lat.
# But you need same key so lets change state to region used in maps all_states
# Note I lowercased it to get the match

all_data$region <- tolower(all_data$State)
totaldf <- merge(all_states, all_data, by = "region")
#totaldf2 <- merge(state_labels, totaldf, by = "fips")



# switched to data.table to fix the cut up map issue
# getting sort by region then order 
totaldt <- as.data.table(totaldf)
setkey(totaldt, region, order)

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = DS_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Data Scientist Annual Salary", subtitle ="New York + California Contain the Highest Salary Earners; South and Midwest Lag Behind") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=12, face="bold", hjust=-0.2),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = Data_Analyst_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Data Analyst Annual Salary", subtitle ="New York has the Highest Salary Earners; South and Midwest Lag Behind") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=12, face="bold.italic", hjust=0.5),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = Big_Data_Engineer_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Big Data Engineer Annual Salary", subtitle ="Washington State has the Highest Salary Earners; Southeast States Lag Behind") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=12, face="bold.italic", hjust=0.5),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = ML_Engineer_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Machine Learning Engineer Annual Salary", subtitle ="New York Comtains the Highest Salary Earners; South and Midwest Lag Behind") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=12, face="bold.italic", hjust=0.5),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = Business_Analyst_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Business Analyst Annual Salary", subtitle ="Washington State and NY Contain the Highest Salary Earners; Southeast States Lag Behind") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=11, face="bold.italic", hjust=-0.5),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = Data_Architect_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Data Architect Annual Salary", subtitle ="Nevada and Massachusetts Contain the Highest Salary Earners; Most States Lag Behind") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=12, face="bold.italic", hjust=-0.3),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = Data_Manager_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Data Manager Annual Salary", subtitle ="New York Contains the Highest Salary Earners; South and Midwest Lag Behind") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=12, face="bold.italic", hjust=0.5),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = Data_Visualization_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Data Visualization Engineer Annual Salary", subtitle ="Washington State and New York Contain the Highest Salary Earners; South and Midwest Lag Behind") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=10, face="bold.italic", hjust=-0.3),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

ggplot(data = totaldt, 
       aes(x = long, y = lat, group = group, fill = Data_Engineer_Annual_Salary)) +
  geom_polygon() + coord_map() +
  #geom_text(data = totaldf2, aes(
    #x = x, y = y,
    #label = stateabbr,
  #), color = "white")
  #geom_text(data = totaldt, aes(x = long, y = lat, label = state.abb), size = 3) +
  scale_fill_gradientn("", colours=brewer.pal(9, "YlGnBu")) +
  ggtitle("Data Engineer Annual Salary", subtitle ="Nevada, Oregon and Massachusetts Contain the Highest Salary Earners") +
  theme_void()+
  theme(plot.subtitle=element_text(color="blue", size=12, face="bold.italic", hjust=0.5),
        plot.title=element_text(color="red", size=12, face="bold", hjust=0.5))

# I tried to input state abbreviations for each state on the map, but did not have time

# Get centroids
#centroid_labels <- usmapdata::centroid_labels("states")

# Join centroids to data
#state_labels <- merge(all_data, centroid_labels, by = "full")

#plot_usmap(data = statecounts, regions = "state", values = "n") +
#  geom_text(data = state_labels, aes(
    #x = x, y = y,
   # label = stateabbr,
  #), color = "white") +
  #labs(title = "Frequency of Unique Users in the United States",
       #caption = "",
       #fill = "Data_Scientist_Annual_Salary")

Findings

Data Architect job titles earned the highest salary; Data Analyst earned the lowest.
Data Engineer earned more on average than Big Data Engineer.
South and Midwest States did not have as high of a salary for most Data Science roles.
High salary earners mostly came from states in the Northeast and West.

Next Steps

Compare salaries by job sector.
Analyze other Data Science-related roles.
Include other sources of data (Bureau of Labor Statistics, LinkedIn, Indeed).