Salary Data Science

Project Objective

To predict the factors which affect to Data Science's salary.

Step 0: Install and load required libraries

# Load necessary libraries
library(readxl)    # For reading Excel files
library(dplyr)     # For data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)   # For visualization
library(scales)    # For formatting axes
library(tidyr)     # For reshaping data

Step 1: Import and Clean Data

data <- read_excel(file.choose()) # Replace with the actual file path
data <- data %>%
  rename(salary_usd = salary,
         job_title = job_title,
         experience_level = experience_level,
         location = company_location,
         company_size = company_size) %>%
  mutate(experience_level = recode(experience_level,
                                   "EN" = "Entry-Level",
                                   "MI" = "Mid-Level",
                                   "SE" = "Senior-Level",
                                   "EX" = "Executive"))
Interpretation: The dataset has been cleaned and standardized. Variables like `experience_level' now have descriptive labels for better readability.

Step 2: Descriptive Statistics

summary(data)
##    work_year    experience_level   employment_type     job_title        
##  Min.   :2020   Length:3755        Length:3755        Length:3755       
##  1st Qu.:2022   Class :character   Class :character   Class :character  
##  Median :2022   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2022                                                           
##  3rd Qu.:2023                                                           
##  Max.   :2023                                                           
##    salary_usd       location         company_size      
##  Min.   :  5132   Length:3755        Length:3755       
##  1st Qu.: 95000   Class :character   Class :character  
##  Median :135000   Mode  :character   Mode  :character  
##  Mean   :137570                                        
##  3rd Qu.:175000                                        
##  Max.   :450000
Interpretation: - Senior-Level and Executive positions tend to have higher median salaries.
- Variations are also observed across job titles, locations, and company sizes.

Step 3: Visualization 1 - Salary by Job Title

# Filter top 10 job titles by frequency
top_titles <- data %>%
  count(job_title, sort = TRUE) %>%
  top_n(10, n) %>%
  pull(job_title)

data_top_titles <- data %>%
  filter(job_title %in% top_titles) %>%
  group_by(job_title) %>%
  filter(between(salary_usd, quantile(salary_usd, 0.25) - 1.5 * IQR(salary_usd),
                 quantile(salary_usd, 0.75) + 1.5 * IQR(salary_usd))) %>%
  ungroup()

# Order job titles by median salary
job_title_order <- data_top_titles %>%
  group_by(job_title) %>%
  summarize(median_salary = median(salary_usd, na.rm = TRUE)) %>%
  arrange(median_salary) %>%
  pull(job_title)

data_top_titles$job_title <- factor(data_top_titles$job_title, levels = job_title_order)

# Plot
ggplot(data_top_titles, aes(x = job_title, y = salary_usd)) +
  geom_boxplot(fill = "skyblue", outlier.shape = NA) +
  scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "k", scale = 1e-3),
                     breaks = seq(0, 400000, by = 50000)) +
  labs(title = "Average Salary by Top Job Titles (Ascending Order)",
       x = "Job Title",
       y = "Salary (USD)") +
  theme_minimal()

Interpretation: Certain job titles (e.g., "Principal Data Scientist") show consistently higher salaries. Specialization in these roles leads to greater compensation.

Step 4: Visualization 2 - Salary by Experience Level

data_experience <- data %>%
  group_by(experience_level) %>%
  filter(between(salary_usd, quantile(salary_usd, 0.25) - 1.5 * IQR(salary_usd),
                 quantile(salary_usd, 0.75) + 1.5 * IQR(salary_usd))) %>%
  ungroup()

# Order experience levels by median salary
experience_order <- data_experience %>%
  group_by(experience_level) %>%
  summarize(median_salary = median(salary_usd, na.rm = TRUE)) %>%
  arrange(median_salary) %>%
  pull(experience_level)

data_experience$experience_level <- factor(data_experience$experience_level, levels = experience_order)

# Plot
ggplot(data_experience, aes(x = experience_level, y = salary_usd)) +
  geom_boxplot(fill = "skyblue", outlier.shape = NA) +
  scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "k", scale = 1e-3),
                     breaks = seq(0, 400000, by = 50000)) +
  labs(title = "Salary by Experience Level (Ascending Order)",
       x = "Experience Level",
       y = "Salary (USD)") +
  theme_minimal()

Interpretation: Salaries increase significantly with experience, with Executive and Senior-Level role earning the most.

Step 5: Correlation Analysis

correlation_matrix <- cor(data %>% select_if(is.numeric), use = "complete.obs")
print(correlation_matrix)
##            work_year salary_usd
## work_year    1.00000    0.22829
## salary_usd   0.22829    1.00000
Interpretation: - Positive correlation between `work_year` and `salary_usd` (0.23) suggests salaries increase slightly over time. Weak negative correlation between `remote_ratio` and `salary_usd` (-0.06) indicates fully remote roles may have slightly lower pay.

Step 6: Build the Regression Model

data$experience_level <- as.factor(data$experience_level)
data$job_title <- as.factor(data$job_title)
data$location <- as.factor(data$location)
data$company_size <- as.factor(data$company_size)

full_model <- lm(salary_usd ~ experience_level + job_title + location + company_size, data = data)
summary(full_model)
## 
## Call:
## lm(formula = salary_usd ~ experience_level + job_title + location + 
##     company_size, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -142684  -29181   -4419   23731  375762 
## 
## Coefficients:
##                                                    Estimate Std. Error t value
## (Intercept)                                         28144.6    46694.6   0.603
## experience_levelExecutive                           86199.3     5815.4  14.823
## experience_levelMid-Level                           20178.2     3429.4   5.884
## experience_levelSenior-Level                        46081.8     3232.9  14.254
## job_titleAI Developer                              127572.4    41183.0   3.098
## job_titleAI Programmer                              14385.3    52177.7   0.276
## job_titleAI Scientist                               50420.6    38581.4   1.307
## job_titleAnalytics Engineer                         34034.5    37501.6   0.908
## job_titleApplied Data Scientist                     68004.4    40508.7   1.679
## job_titleApplied Machine Learning Engineer          38736.2    50240.2   0.771
## job_titleApplied Machine Learning Scientist         32321.7    39688.9   0.814
## job_titleApplied Scientist                          70095.2    37795.1   1.855
## job_titleAutonomous Vehicle Technician              47598.8    77472.2   0.614
## job_titleAzure Data Engineer                        43555.0    61860.0   0.704
## job_titleBI Analyst                                 17616.4    40616.8   0.434
## job_titleBI Data Analyst                             4169.6    39292.5   0.106
## job_titleBI Data Engineer                          -19854.1    60332.9  -0.329
## job_titleBI Developer                               11148.4    39471.8   0.282
## job_titleBig Data Architect                         35387.5    50230.8   0.704
## job_titleBig Data Engineer                          35943.8    40416.2   0.889
## job_titleBusiness Data Analyst                      12582.4    38397.0   0.328
## job_titleBusiness Intelligence Engineer             48214.1    44120.4   1.093
## job_titleCloud Data Architect                      125996.5    60381.0   2.087
## job_titleCloud Data Engineer                        76480.9    52497.7   1.457
## job_titleCloud Database Engineer                    29450.6    42845.8   0.687
## job_titleCompliance Data Analyst                   -50691.7    65107.3  -0.779
## job_titleComputer Vision Engineer                   64921.7    38888.3   1.669
## job_titleComputer Vision Software Engineer          26842.9    43325.5   0.620
## job_titleData Analyst                                3884.2    37240.0   0.104
## job_titleData Analytics Consultant                   6639.0    50256.7   0.132
## job_titleData Analytics Engineer                    17663.3    42485.8   0.416
## job_titleData Analytics Lead                       148972.4    52343.6   2.846
## job_titleData Analytics Manager                     25050.0    38554.2   0.650
## job_titleData Analytics Specialist                 -30935.9    50084.1  -0.618
## job_titleData Architect                             38281.2    37510.1   1.021
## job_titleData DevOps Engineer                       28206.3    61108.3   0.462
## job_titleData Engineer                              31148.9    37227.0   0.837
## job_titleData Infrastructure Engineer               66384.9    41922.4   1.584
## job_titleData Lead                                  86564.1    50084.1   1.728
## job_titleData Management Specialist                 21752.6    66363.6   0.328
## job_titleData Manager                                7365.4    38246.4   0.193
## job_titleData Modeler                               -7035.9    50084.1  -0.140
## job_titleData Operations Analyst                   -35373.4    44120.4  -0.802
## job_titleData Operations Engineer                  -11908.7    40095.8  -0.297
## job_titleData Quality Analyst                      -34161.0    41444.0  -0.824
## job_titleData Science Consultant                    11060.0    38470.7   0.287
## job_titleData Science Engineer                       2205.2    43035.4   0.051
## job_titleData Science Lead                          55819.1    40827.8   1.367
## job_titleData Science Manager                       72774.1    37724.7   1.929
## job_titleData Science Tech Lead                    250996.5    60381.0   4.157
## job_titleData Scientist                             36232.7    37229.0   0.973
## job_titleData Scientist Lead                        62135.9    51196.2   1.214
## job_titleData Specialist                             4951.7    39321.7   0.126
## job_titleData Strategist                           -24537.5    50338.0  -0.487
## job_titleDeep Learning Engineer                     32171.8    41944.8   0.767
## job_titleDeep Learning Researcher                   48955.5    60711.7   0.806
## job_titleDirector of Data Science                   73266.7    40178.4   1.824
## job_titleETL Developer                              24152.4    40180.0   0.601
## job_titleETL Engineer                               20073.2    50190.8   0.400
## job_titleFinance Data Analyst                      -13396.5    60518.3  -0.221
## job_titleFinancial Data Analyst                      9648.7    46256.5   0.209
## job_titleHead of Data                               79531.5    40603.5   1.959
## job_titleHead of Data Science                       43696.3    40629.1   1.075
## job_titleHead of Machine Learning                    5216.0    60415.3   0.086
## job_titleInsight Analyst                            -4535.3    50190.8  -0.090
## job_titleLead Data Analyst                           6500.0    42726.1   0.152
## job_titleLead Data Engineer                         44445.3    43046.5   1.032
## job_titleLead Data Scientist                        50986.1    40613.5   1.255
## job_titleLead Machine Learning Engineer             40963.3    46336.3   0.884
## job_titleMachine Learning Developer                 22997.0    42379.8   0.543
## job_titleMachine Learning Engineer                  52418.7    37308.9   1.405
## job_titleMachine Learning Infrastructure Engineer   58984.6    39934.0   1.477
## job_titleMachine Learning Manager                   37209.1    46243.8   0.805
## job_titleMachine Learning Research Engineer         24774.1    44102.8   0.562
## job_titleMachine Learning Researcher                31577.4    42174.6   0.749
## job_titleMachine Learning Scientist                 69897.1    38471.4   1.817
## job_titleMachine Learning Software Engineer         95819.0    40478.6   2.367
## job_titleManager Data Management                      996.5    60381.0   0.017
## job_titleMarketing Data Analyst                    103964.7    51910.1   2.003
## job_titleMarketing Data Engineer                    88519.8    67564.8   1.310
## job_titleML Engineer                                55726.1    38128.4   1.462
## job_titleMLOps Engineer                             28967.7    44106.7   0.657
## job_titleNLP Engineer                               55047.1    41669.5   1.321
## job_titlePower BI Developer                         20515.3    60222.7   0.341
## job_titlePrincipal Data Analyst                     33831.1    50242.2   0.673
## job_titlePrincipal Data Architect                    7178.5    60226.0   0.119
## job_titlePrincipal Data Engineer                    67530.3    50133.6   1.347
## job_titlePrincipal Data Scientist                  104638.2    40935.0   2.556
## job_titlePrincipal Machine Learning Engineer        65996.5    60381.0   1.093
## job_titleProduct Data Analyst                       18114.0    43910.5   0.413
## job_titleProduct Data Scientist                    -16532.0    67549.2  -0.245
## job_titleResearch Engineer                          62643.8    38020.0   1.648
## job_titleResearch Scientist                         66899.8    37574.8   1.780
## job_titleSoftware Data Engineer                     19364.5    53019.9   0.365
## job_titleStaff Data Analyst                       -130655.0    60656.7  -2.154
## job_titleStaff Data Scientist                      -20935.9    60266.3  -0.347
## locationAL                                         -37926.8    66486.9  -0.570
## locationAM                                         -24441.9    55066.1  -0.444
## locationAR                                         -19033.5    39838.0  -0.478
## locationAS                                         -11672.2    41782.8  -0.279
## locationAT                                         -24292.1    34719.2  -0.700
## locationAU                                          11503.9    31621.4   0.364
## locationBA                                          -9417.4    57990.5  -0.162
## locationBE                                          -6561.8    36702.3  -0.179
## locationBO                                         -76789.7    55349.9  -1.387
## locationBR                                         -37559.6    30825.3  -1.218
## locationBS                                         -52299.0    87923.4  -0.595
## locationCA                                          29378.7    28611.0   1.027
## locationCF                                         -31433.9    43816.1  -0.717
## locationCH                                           8116.7    35329.9   0.230
## locationCL                                         -44517.5    55187.8  -0.807
## locationCN                                           4955.6    55482.5   0.089
## locationCO                                         -28586.7    37204.6  -0.768
## locationCR                                          48155.0    66530.5   0.724
## locationCZ                                         -59107.9    40136.7  -1.473
## locationDE                                            223.0    28799.6   0.008
## locationDK                                         -42494.8    38726.4  -1.097
## locationDZ                                          33690.4    55256.1   0.610
## locationEE                                         -58590.9    44416.0  -1.319
## locationEG                                         -97352.2    56034.3  -1.737
## locationES                                         -39036.4    28657.1  -1.362
## locationFI                                         -71986.1    39978.3  -1.801
## locationFR                                         -22479.1    29296.6  -0.767
## locationGB                                           1066.1    28332.6   0.038
## locationGH                                         -42443.9    56584.0  -0.750
## locationGR                                         -17492.4    30977.4  -0.565
## locationHK                                         -19493.5    55187.8  -0.353
## locationHN                                         -20137.2    60045.4  -0.335
## locationHR                                           6426.3    39154.6   0.164
## locationHU                                         -48723.1    43823.7  -1.112
## locationID                                         -24084.1    43837.4  -0.549
## locationIE                                          10568.5    32996.6   0.320
## locationIL                                         165879.1    44277.3   3.746
## locationIN                                         -43250.9    28746.4  -1.505
## locationIQ                                          75157.9    58670.3   1.281
## locationIR                                          17504.3    55208.8   0.317
## locationIT                                         -27686.1    39314.1  -0.704
## locationJP                                          30468.7    34294.0   0.888
## locationKE                                          -3385.2    44969.4  -0.075
## locationLT                                          13407.9    43808.8   0.306
## locationLU                                         -20717.9    39631.4  -0.523
## locationLV                                         -62537.9    36759.6  -1.701
## locationMA                                         -87664.0    57427.7  -1.527
## locationMD                                         -39967.1    57386.3  -0.696
## locationMK                                        -123113.4    57990.5  -2.123
## locationMT                                         -51102.8    55181.0  -0.926
## locationMX                                          -6082.3    31856.3  -0.191
## locationMY                                         -24377.3    55255.5  -0.441
## locationNG                                          52547.1    37230.6   1.411
## locationNL                                         -17781.5    31277.0  -0.569
## locationNZ                                          32627.9    59021.0   0.553
## locationPH                                          -1811.0    55128.1  -0.033
## locationPK                                         -59545.2    37525.2  -1.587
## locationPL                                         -44547.2    35503.3  -1.255
## locationPR                                          49557.4    36754.3   1.348
## locationPT                                         -55690.7    30802.3  -1.808
## locationRO                                         -28550.3    44527.3  -0.641
## locationRU                                         -39988.7    40160.4  -0.996
## locationSE                                           7280.9    45468.8   0.160
## locationSG                                         -49694.5    41326.3  -1.202
## locationSI                                         -41560.2    36986.4  -1.124
## locationSK                                        -111799.7    66209.6  -1.689
## locationTH                                         -51241.0    39999.8  -1.281
## locationTR                                         -70190.6    35250.4  -1.991
## locationUA                                         -79985.9    38208.7  -2.093
## locationUS                                          49777.1    28133.2   1.769
## locationVN                                         -47293.6    55246.9  -0.856
## company_sizeM                                        1932.3     2802.0   0.690
## company_sizeS                                      -26299.6     5177.4  -5.080
##                                                   Pr(>|t|)    
## (Intercept)                                       0.546721    
## experience_levelExecutive                          < 2e-16 ***
## experience_levelMid-Level                         4.37e-09 ***
## experience_levelSenior-Level                       < 2e-16 ***
## job_titleAI Developer                             0.001965 ** 
## job_titleAI Programmer                            0.782797    
## job_titleAI Scientist                             0.191343    
## job_titleAnalytics Engineer                       0.364177    
## job_titleApplied Data Scientist                   0.093286 .  
## job_titleApplied Machine Learning Engineer        0.440746    
## job_titleApplied Machine Learning Scientist       0.415483    
## job_titleApplied Scientist                        0.063734 .  
## job_titleAutonomous Vehicle Technician            0.538991    
## job_titleAzure Data Engineer                      0.481423    
## job_titleBI Analyst                               0.664516    
## job_titleBI Data Analyst                          0.915495    
## job_titleBI Data Engineer                         0.742118    
## job_titleBI Developer                             0.777622    
## job_titleBig Data Architect                       0.481169    
## job_titleBig Data Engineer                        0.373879    
## job_titleBusiness Data Analyst                    0.743163    
## job_titleBusiness Intelligence Engineer           0.274561    
## job_titleCloud Data Architect                     0.036987 *  
## job_titleCloud Data Engineer                      0.145248    
## job_titleCloud Database Engineer                  0.491898    
## job_titleCompliance Data Analyst                  0.436274    
## job_titleComputer Vision Engineer                 0.095117 .  
## job_titleComputer Vision Software Engineer        0.535584    
## job_titleData Analyst                             0.916937    
## job_titleData Analytics Consultant                0.894912    
## job_titleData Analytics Engineer                  0.677622    
## job_titleData Analytics Lead                      0.004452 ** 
## job_titleData Analytics Manager                   0.515905    
## job_titleData Analytics Specialist                0.536827    
## job_titleData Architect                           0.307533    
## job_titleData DevOps Engineer                     0.644412    
## job_titleData Engineer                            0.402800    
## job_titleData Infrastructure Engineer             0.113392    
## job_titleData Lead                                0.084007 .  
## job_titleData Management Specialist               0.743097    
## job_titleData Manager                             0.847302    
## job_titleData Modeler                             0.888288    
## job_titleData Operations Analyst                  0.422753    
## job_titleData Operations Engineer                 0.766479    
## job_titleData Quality Analyst                     0.409842    
## job_titleData Science Consultant                  0.773753    
## job_titleData Science Engineer                    0.959135    
## job_titleData Science Lead                        0.171654    
## job_titleData Science Manager                     0.053800 .  
## job_titleData Science Tech Lead                   3.30e-05 ***
## job_titleData Scientist                           0.330501    
## job_titleData Scientist Lead                      0.224949    
## job_titleData Specialist                          0.899796    
## job_titleData Strategist                          0.625966    
## job_titleDeep Learning Engineer                   0.443130    
## job_titleDeep Learning Researcher                 0.420088    
## job_titleDirector of Data Science                 0.068306 .  
## job_titleETL Developer                            0.547809    
## job_titleETL Engineer                             0.689226    
## job_titleFinance Data Analyst                     0.824822    
## job_titleFinancial Data Analyst                   0.834779    
## job_titleHead of Data                             0.050221 .  
## job_titleHead of Data Science                     0.282226    
## job_titleHead of Machine Learning                 0.931205    
## job_titleInsight Analyst                          0.928006    
## job_titleLead Data Analyst                        0.879092    
## job_titleLead Data Engineer                       0.301910    
## job_titleLead Data Scientist                      0.209416    
## job_titleLead Machine Learning Engineer           0.376732    
## job_titleMachine Learning Developer               0.587410    
## job_titleMachine Learning Engineer                0.160110    
## job_titleMachine Learning Infrastructure Engineer 0.139750    
## job_titleMachine Learning Manager                 0.421087    
## job_titleMachine Learning Research Engineer       0.574331    
## job_titleMachine Learning Researcher              0.454069    
## job_titleMachine Learning Scientist               0.069322 .  
## job_titleMachine Learning Software Engineer       0.017978 *  
## job_titleManager Data Management                  0.986834    
## job_titleMarketing Data Analyst                   0.045276 *  
## job_titleMarketing Data Engineer                  0.190230    
## job_titleML Engineer                              0.143955    
## job_titleMLOps Engineer                           0.511374    
## job_titleNLP Engineer                             0.186572    
## job_titlePower BI Developer                       0.733382    
## job_titlePrincipal Data Analyst                   0.500761    
## job_titlePrincipal Data Architect                 0.905130    
## job_titlePrincipal Data Engineer                  0.178063    
## job_titlePrincipal Data Scientist                 0.010623 *  
## job_titlePrincipal Machine Learning Engineer      0.274467    
## job_titleProduct Data Analyst                     0.679983    
## job_titleProduct Data Scientist                   0.806672    
## job_titleResearch Engineer                        0.099511 .  
## job_titleResearch Scientist                       0.075088 .  
## job_titleSoftware Data Engineer                   0.714961    
## job_titleStaff Data Analyst                       0.031306 *  
## job_titleStaff Data Scientist                     0.728319    
## locationAL                                        0.568415    
## locationAM                                        0.657166    
## locationAR                                        0.632841    
## locationAS                                        0.779989    
## locationAT                                        0.484176    
## locationAU                                        0.716027    
## locationBA                                        0.871004    
## locationBE                                        0.858116    
## locationBO                                        0.165421    
## locationBR                                        0.223128    
## locationBS                                        0.551998    
## locationCA                                        0.304569    
## locationCF                                        0.473171    
## locationCH                                        0.818307    
## locationCL                                        0.419919    
## locationCN                                        0.928834    
## locationCO                                        0.442320    
## locationCR                                        0.469234    
## locationCZ                                        0.140930    
## locationDE                                        0.993822    
## locationDK                                        0.272580    
## locationDZ                                        0.542091    
## locationEE                                        0.187207    
## locationEG                                        0.082408 .  
## locationES                                        0.173224    
## locationFI                                        0.071845 .  
## locationFR                                        0.442958    
## locationGB                                        0.969987    
## locationGH                                        0.453242    
## locationGR                                        0.572325    
## locationHK                                        0.723944    
## locationHN                                        0.737369    
## locationHR                                        0.869641    
## locationHU                                        0.266300    
## locationID                                        0.582767    
## locationIE                                        0.748766    
## locationIL                                        0.000182 ***
## locationIN                                        0.132524    
## locationIQ                                        0.200269    
## locationIR                                        0.751219    
## locationIT                                        0.481337    
## locationJP                                        0.374355    
## locationKE                                        0.939999    
## locationLT                                        0.759580    
## locationLU                                        0.601170    
## locationLV                                        0.088979 .  
## locationMA                                        0.126971    
## locationMD                                        0.486188    
## locationMK                                        0.033823 *  
## locationMT                                        0.354459    
## locationMX                                        0.848592    
## locationMY                                        0.659114    
## locationNG                                        0.158215    
## locationNL                                        0.569721    
## locationNZ                                        0.580422    
## locationPH                                        0.973796    
## locationPK                                        0.112645    
## locationPL                                        0.209657    
## locationPR                                        0.177633    
## locationPT                                        0.070689 .  
## locationRO                                        0.521443    
## locationRU                                        0.319451    
## locationSE                                        0.872788    
## locationSG                                        0.229253    
## locationSI                                        0.261232    
## locationSK                                        0.091388 .  
## locationTH                                        0.200266    
## locationTR                                        0.046535 *  
## locationUA                                        0.036384 *  
## locationUS                                        0.076923 .  
## locationVN                                        0.392033    
## company_sizeM                                     0.490476    
## company_sizeS                                     3.97e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47400 on 3586 degrees of freedom
## Multiple R-squared:  0.4601, Adjusted R-squared:  0.4348 
## F-statistic: 18.19 on 168 and 3586 DF,  p-value: < 2.2e-16

Step 7: Refine the Model

refined_model <- lm(salary_usd ~ experience_level + job_title, data = data)
summary(refined_model)
## 
## Call:
## lm(formula = salary_usd ~ experience_level + job_title, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -171618  -33761   -4253   31341  347577 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                          -1532      26827  -0.057
## experience_levelExecutive                           109830       6370  17.243
## experience_levelMid-Level                            23641       3669   6.443
## experience_levelSenior-Level                         67895       3363  20.189
## job_titleAI Developer                               115383      31267   3.690
## job_titleAI Programmer                               56532      46406   1.218
## job_titleAI Scientist                                84670      29939   2.828
## job_titleAnalytics Engineer                          88993      27346   3.254
## job_titleApplied Data Scientist                      85433      31689   2.696
## job_titleApplied Machine Learning Engineer           77766      46426   1.675
## job_titleApplied Machine Learning Scientist          76532      30939   2.474
## job_titleApplied Scientist                          130925      27726   4.722
## job_titleAutonomous Vehicle Technician               15989      46380   0.345
## job_titleAzure Data Engineer                         33637      59901   0.562
## job_titleBI Analyst                                  63448      32194   1.971
## job_titleBI Data Analyst                             39091      30142   1.297
## job_titleBI Data Engineer                            61532      59895   1.027
## job_titleBI Developer                                73928      30650   2.412
## job_titleBig Data Architect                          59439      46414   1.281
## job_titleBig Data Engineer                           43090      31271   1.378
## job_titleBusiness Data Analyst                       50462      30142   1.674
## job_titleBusiness Intelligence Engineer             107787      37912   2.843
## job_titleCloud Data Architect                       183637      59901   3.066
## job_titleCloud Data Engineer                         35689      40933   0.872
## job_titleCloud Database Engineer                     88637      35972   2.464
## job_titleCompliance Data Analyst                     31532      59895   0.526
## job_titleComputer Vision Engineer                    97178      29620   3.281
## job_titleComputer Vision Software Engineer           60919      35924   1.696
## job_titleData Analyst                                61281      26895   2.278
## job_titleData Analytics Consultant                   71211      46380   1.535
## job_titleData Analytics Engineer                     38886      34571   1.125
## job_titleData Analytics Lead                        167018      46406   3.599
## job_titleData Analytics Manager                      87796      29158   3.011
## job_titleData Analytics Specialist                   28637      46414   0.617
## job_titleData Architect                              97587      27360   3.567
## job_titleData DevOps Engineer                       -12709      59901  -0.212
## job_titleData Engineer                               87398      26869   3.253
## job_titleData Infrastructure Engineer               138191      34612   3.993
## job_titleData Lead                                  146137      46414   3.149
## job_titleData Management Specialist                   1930      59901   0.032
## job_titleData Manager                                62642      28600   2.190
## job_titleData Modeler                                52537      46414   1.132
## job_titleData Operations Analyst                     24199      37912   0.638
## job_titleData Operations Engineer                    58674      31727   1.849
## job_titleData Quality Analyst                        28647      33595   0.853
## job_titleData Science Consultant                     47924      28930   1.657
## job_titleData Science Engineer                       35970      35963   1.000
## job_titleData Science Lead                          106566      32834   3.246
## job_titleData Science Manager                       127404      27741   4.593
## job_titleData Science Tech Lead                     308637      59901   5.152
## job_titleData Scientist                              87158      26879   3.243
## job_titleData Scientist Lead                         91917      46406   1.981
## job_titleData Specialist                             70773      30394   2.329
## job_titleData Strategist                             14637      46414   0.315
## job_titleDeep Learning Engineer                      95012      34581   2.748
## job_titleDeep Learning Researcher                    57042      59901   0.952
## job_titleDirector of Data Science                    98279      31567   3.113
## job_titleETL Developer                               73941      31727   2.331
## job_titleETL Engineer                                49285      46426   1.062
## job_titleFinance Data Analyst                        -4467      59901  -0.075
## job_titleFinancial Data Analyst                      87438      40912   2.137
## job_titleHead of Data                                96759      31895   3.034
## job_titleHead of Data Science                        71447      32504   2.198
## job_titleHead of Machine Learning                   -31989      60148  -0.532
## job_titleInsight Analyst                             24677      46426   0.532
## job_titleLead Data Analyst                           41609      35972   1.157
## job_titleLead Data Engineer                          73254      34620   2.116
## job_titleLead Data Scientist                         56247      32225   1.745
## job_titleLead Machine Learning Engineer              16564      40943   0.405
## job_titleMachine Learning Developer                  62974      33572   1.876
## job_titleMachine Learning Engineer                  102296      27000   3.789
## job_titleMachine Learning Infrastructure Engineer    92741      31310   2.962
## job_titleMachine Learning Manager                    89338      40943   2.182
## job_titleMachine Learning Research Engineer          38441      37875   1.015
## job_titleMachine Learning Researcher                 76388      34620   2.206
## job_titleMachine Learning Scientist                 117577      28798   4.083
## job_titleMachine Learning Software Engineer         132846      31719   4.188
## job_titleManager Data Management                     58637      59901   0.979
## job_titleMarketing Data Analyst                      77964      46414   1.680
## job_titleMarketing Data Engineer                     68502      59895   1.144
## job_titleML Engineer                                110301      28344   3.891
## job_titleMLOps Engineer                             106891      37926   2.818
## job_titleNLP Engineer                                85388      33605   2.541
## job_titlePower BI Developer                           6941      59895   0.116
## job_titlePrincipal Data Analyst                      78264      46406   1.686
## job_titlePrincipal Data Architect                   -28209      59901  -0.471
## job_titlePrincipal Data Engineer                    126137      46414   2.718
## job_titlePrincipal Data Scientist                   132098      32845   4.022
## job_titlePrincipal Machine Learning Engineer        123637      59901   2.064
## job_titleProduct Data Analyst                        39116      35952   1.088
## job_titleProduct Data Scientist                     -58363      59901  -0.974
## job_titleResearch Engineer                          121075      28199   4.294
## job_titleResearch Scientist                         114204      27451   4.160
## job_titleSoftware Data Engineer                      18274      46406   0.394
## job_titleStaff Data Analyst                         -93298      60148  -1.551
## job_titleStaff Data Scientist                        38637      59901   0.645
##                                                   Pr(>|t|)    
## (Intercept)                                       0.954471    
## experience_levelExecutive                          < 2e-16 ***
## experience_levelMid-Level                         1.32e-10 ***
## experience_levelSenior-Level                       < 2e-16 ***
## job_titleAI Developer                             0.000227 ***
## job_titleAI Programmer                            0.223228    
## job_titleAI Scientist                             0.004708 ** 
## job_titleAnalytics Engineer                       0.001147 ** 
## job_titleApplied Data Scientist                   0.007049 ** 
## job_titleApplied Machine Learning Engineer        0.094004 .  
## job_titleApplied Machine Learning Scientist       0.013419 *  
## job_titleApplied Scientist                        2.42e-06 ***
## job_titleAutonomous Vehicle Technician            0.730311    
## job_titleAzure Data Engineer                      0.574463    
## job_titleBI Analyst                               0.048822 *  
## job_titleBI Data Analyst                          0.194748    
## job_titleBI Data Engineer                         0.304331    
## job_titleBI Developer                             0.015914 *  
## job_titleBig Data Architect                       0.200404    
## job_titleBig Data Engineer                        0.168293    
## job_titleBusiness Data Analyst                    0.094184 .  
## job_titleBusiness Intelligence Engineer           0.004493 ** 
## job_titleCloud Data Architect                     0.002188 ** 
## job_titleCloud Data Engineer                      0.383335    
## job_titleCloud Database Engineer                  0.013782 *  
## job_titleCompliance Data Analyst                  0.598605    
## job_titleComputer Vision Engineer                 0.001045 ** 
## job_titleComputer Vision Software Engineer        0.090016 .  
## job_titleData Analyst                             0.022754 *  
## job_titleData Analytics Consultant                0.124771    
## job_titleData Analytics Engineer                  0.260745    
## job_titleData Analytics Lead                      0.000324 ***
## job_titleData Analytics Manager                   0.002621 ** 
## job_titleData Analytics Specialist                0.537284    
## job_titleData Architect                           0.000366 ***
## job_titleData DevOps Engineer                     0.831983    
## job_titleData Engineer                            0.001153 ** 
## job_titleData Infrastructure Engineer             6.66e-05 ***
## job_titleData Lead                                0.001654 ** 
## job_titleData Management Specialist               0.974303    
## job_titleData Manager                             0.028567 *  
## job_titleData Modeler                             0.257744    
## job_titleData Operations Analyst                  0.523322    
## job_titleData Operations Engineer                 0.064491 .  
## job_titleData Quality Analyst                     0.393861    
## job_titleData Science Consultant                  0.097703 .  
## job_titleData Science Engineer                    0.317294    
## job_titleData Science Lead                        0.001183 ** 
## job_titleData Science Manager                     4.52e-06 ***
## job_titleData Science Tech Lead                   2.71e-07 ***
## job_titleData Scientist                           0.001195 ** 
## job_titleData Scientist Lead                      0.047699 *  
## job_titleData Specialist                          0.019938 *  
## job_titleData Strategist                          0.752514    
## job_titleDeep Learning Engineer                   0.006034 ** 
## job_titleDeep Learning Researcher                 0.341022    
## job_titleDirector of Data Science                 0.001864 ** 
## job_titleETL Developer                            0.019831 *  
## job_titleETL Engineer                             0.288487    
## job_titleFinance Data Analyst                     0.940554    
## job_titleFinancial Data Analyst                   0.032647 *  
## job_titleHead of Data                             0.002433 ** 
## job_titleHead of Data Science                     0.028005 *  
## job_titleHead of Machine Learning                 0.594870    
## job_titleInsight Analyst                          0.595079    
## job_titleLead Data Analyst                        0.247468    
## job_titleLead Data Engineer                       0.034415 *  
## job_titleLead Data Scientist                      0.080989 .  
## job_titleLead Machine Learning Engineer           0.685815    
## job_titleMachine Learning Developer               0.060768 .  
## job_titleMachine Learning Engineer                0.000154 ***
## job_titleMachine Learning Infrastructure Engineer 0.003076 ** 
## job_titleMachine Learning Manager                 0.029171 *  
## job_titleMachine Learning Research Engineer       0.310207    
## job_titleMachine Learning Researcher              0.027414 *  
## job_titleMachine Learning Scientist               4.54e-05 ***
## job_titleMachine Learning Software Engineer       2.88e-05 ***
## job_titleManager Data Management                  0.327695    
## job_titleMarketing Data Analyst                   0.093092 .  
## job_titleMarketing Data Engineer                  0.252821    
## job_titleML Engineer                              0.000101 ***
## job_titleMLOps Engineer                           0.004853 ** 
## job_titleNLP Engineer                             0.011097 *  
## job_titlePower BI Developer                       0.907752    
## job_titlePrincipal Data Analyst                   0.091786 .  
## job_titlePrincipal Data Architect                 0.637715    
## job_titlePrincipal Data Engineer                  0.006606 ** 
## job_titlePrincipal Data Scientist                 5.89e-05 ***
## job_titlePrincipal Machine Learning Engineer      0.039085 *  
## job_titleProduct Data Analyst                     0.276656    
## job_titleProduct Data Scientist                   0.329954    
## job_titleResearch Engineer                        1.80e-05 ***
## job_titleResearch Scientist                       3.25e-05 ***
## job_titleSoftware Data Engineer                   0.693767    
## job_titleStaff Data Analyst                       0.120954    
## job_titleStaff Data Scientist                     0.518960    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 53550 on 3659 degrees of freedom
## Multiple R-squared:  0.297,  Adjusted R-squared:  0.2788 
## F-statistic: 16.27 on 95 and 3659 DF,  p-value: < 2.2e-16
Interpretation: Location and company size were insignificant predictors and removed from the refined model. Experience level and job title remain significant, explaining most of the salary variation.

Step 8: Predictions with Confidence and Prediction Intervals

new_data <- data.frame(
  experience_level = "Senior-Level",
  job_title = "Data Scientist"
)

confidence_interval <- predict(refined_model, newdata = new_data, interval = "confidence")
prediction_interval <- predict(refined_model, newdata = new_data, interval = "prediction")

print(confidence_interval)
##        fit      lwr      upr
## 1 153521.6 149745.5 157297.7
print(prediction_interval)
##        fit      lwr      upr
## 1 153521.6 48461.75 258581.4
Interpretation: Confidence Interval: Expected salary for a Senior-Level Data Scientist ranges from $149,745 to $157,298. 
Prediction Interval: Individual salaries for this role could vary widely, from $48,462 to $258,581.