Joe Yang
2025-12-02
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
projection_data <- read.csv("Washington_Job_Proj_2022_2032.csv", header = TRUE, stringsAsFactors = FALSE)
#View Data
glimpse(projection_data)## Rows: 10
## Columns: 12
## $ Area <chr> "Washington", "Washington", "Washington", "Washingto…
## $ Title <chr> "Computer and Information Research Scientists", "Com…
## $ Base <int> 2850, 5190, 4710, 7230, 20320, 4030, 4310, 10980, 10…
## $ Projected <int> 3710, 5690, 5590, 8220, 23540, 4770, 5840, 12720, 13…
## $ Change <int> 860, 500, 880, 990, 3220, 740, 1530, 1740, 30940, 63…
## $ PercentChange <dbl> 30.2, 9.6, 18.7, 13.7, 15.8, 18.4, 35.5, 15.8, 29.8,…
## $ AvgAnnualOpenings <int> 380, 380, 510, 660, 1920, 400, 600, 1000, 12910, 2790
## $ STFIPS <int> 53, 53, 53, 53, 53, 53, 53, 53, 53, 53
## $ StateURL <chr> "https://esd.wa.gov/labormarketinfo/projections", "h…
## $ OccCode <chr> "15-1221", "15-1241", "15-1231", "15-1251", "15-1211…
## $ BaseYear <int> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022…
## $ ProjYear <int> 2032, 2032, 2032, 2032, 2032, 2032, 2032, 2032, 2032…
tech_occup <- read.csv("Computer and Information Technology Occupations - Sheet1.csv", header = TRUE, stringsAsFactors = FALSE)
#View Data
glimpse(tech_occup)## Rows: 10
## Columns: 9
## $ Occupation <chr> "Computer and Information Rese…
## $ Job.Summary <chr> "Computer and information rese…
## $ Median.Pay..per.year. <chr> "$140,910", "$130,390", "$98,6…
## $ Typical.Entry.Level.Education <chr> "Master’s degree", "Bachelor’s…
## $ Work.Experience.in.a.Related.Occupation <chr> "None", "5 years or more", "No…
## $ On.the.job.Training <chr> "None", "None", "None", "Moder…
## $ Number.of.Jobs..2024 <chr> "40,300", "179,200", "121,200"…
## $ Job.Outlook..2024.34 <chr> "20%", "12%", "-6%", "-3%", "9…
## $ Employment.Change..2024.34 <chr> "7,900", "21,400", "-7,200", "…
#Convert tech_occup from characters to numerical values
tech_occup <- tech_occup %>%
mutate(
Median.Pay..per.year. = as.numeric(gsub("[\\$,]", "", Median.Pay..per.year.)),
Number.of.Jobs..2024 = as.numeric(gsub(",", "", Number.of.Jobs..2024)),
Employment.Change..2024.34 = as.numeric(gsub(",", "", Employment.Change..2024.34))
)#Combine by matching Title and Occupation
combined_data <- projection_data %>%
left_join(tech_occup, by = c("Title" = "Occupation"))## Rows: 10
## Columns: 20
## $ Area <chr> "Washington", "Washington", "W…
## $ Title <chr> "Computer and Information Rese…
## $ Base <int> 2850, 5190, 4710, 7230, 20320,…
## $ Projected <int> 3710, 5690, 5590, 8220, 23540,…
## $ Change <int> 860, 500, 880, 990, 3220, 740,…
## $ PercentChange <dbl> 30.2, 9.6, 18.7, 13.7, 15.8, 1…
## $ AvgAnnualOpenings <int> 380, 380, 510, 660, 1920, 400,…
## $ STFIPS <int> 53, 53, 53, 53, 53, 53, 53, 53…
## $ StateURL <chr> "https://esd.wa.gov/labormarke…
## $ OccCode <chr> "15-1221", "15-1241", "15-1231…
## $ BaseYear <int> 2022, 2022, 2022, 2022, 2022, …
## $ ProjYear <int> 2032, 2032, 2032, 2032, 2032, …
## $ Job.Summary <chr> "Computer and information rese…
## $ Median.Pay..per.year. <dbl> 140910, 130390, 98670, 61550, …
## $ Typical.Entry.Level.Education <chr> "Master’s degree", "Bachelor’s…
## $ Work.Experience.in.a.Related.Occupation <chr> "None", "5 years or more", "No…
## $ On.the.job.Training <chr> "None", "None", "None", "Moder…
## $ Number.of.Jobs..2024 <dbl> 40300, 179200, 121200, 882300,…
## $ Job.Outlook..2024.34 <chr> "20%", "12%", "-6%", "-3%", "9…
## $ Employment.Change..2024.34 <dbl> 7900, 21400, -7200, -24200, 45…
combined_data = combined_data %>%
select(JobTitle = Title, Base, Projected, Change, PercentChange, AvgAnnualOpenings, BaseYear, ProjYear, MedianPayYearly = Median.Pay..per.year., EntryLevel = Typical.Entry.Level.Education, WorkExperience = Work.Experience.in.a.Related.Occupation, NumberOfJobs_2024_2034 = Number.of.Jobs..2024, EmploymentChange_2024_2034 = Employment.Change..2024.34)## Rows: 10
## Columns: 13
## $ JobTitle <chr> "Computer and Information Research Scientis…
## $ Base <int> 2850, 5190, 4710, 7230, 20320, 4030, 4310, …
## $ Projected <int> 3710, 5690, 5590, 8220, 23540, 4770, 5840, …
## $ Change <int> 860, 500, 880, 990, 3220, 740, 1530, 1740, …
## $ PercentChange <dbl> 30.2, 9.6, 18.7, 13.7, 15.8, 18.4, 35.5, 15…
## $ AvgAnnualOpenings <int> 380, 380, 510, 660, 1920, 400, 600, 1000, 1…
## $ BaseYear <int> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2…
## $ ProjYear <int> 2032, 2032, 2032, 2032, 2032, 2032, 2032, 2…
## $ MedianPayYearly <dbl> 140910, 130390, 98670, 61550, 103790, 12310…
## $ EntryLevel <chr> "Master’s degree", "Bachelor’s degree", "Ba…
## $ WorkExperience <chr> "None", "5 years or more", "None", "None", …
## $ NumberOfJobs_2024_2034 <dbl> 40300, 179200, 121200, 882300, 521100, 1449…
## $ EmploymentChange_2024_2034 <dbl> 7900, 21400, -7200, -24200, 45500, 5300, 52…
Projected trend for computer-related professions over the next 10 years?
In terms of both projected job growth and pay, which computer-related occupations are projected to rise in the following years?
Rank computer-related professions from highest to lowest in terms of percent change.
combined_data %>%
ggplot(aes(x=reorder(JobTitle,PercentChange), y=PercentChange)) +
geom_col(aes(fill=JobTitle)) +
coord_flip() + ##Flip x-axis & y-axis to fit job titles
labs(title = "Top Computer Occupations by Job Growth (2022–2032)",
x = "Job Title",
y = "PercentChange (%)") +
theme(legend.position = "none")The graph shows the projected job growth for computer-related professions between 2022 and 2032.
We can tell from the values on the graph that the highest growth jobs are around ~30% while the lowest growth jobs are around ~10%.
In the previous question, we already found the ranking of computer-related professions in terms of job growth.
Now we find the ranking of computer-related professions in terms of median pay.
combined_data %>%
ggplot(aes(x=reorder(JobTitle,MedianPayYearly), y=MedianPayYearly)) +
geom_col(aes(fill=JobTitle)) +
coord_flip() + ##Flip x-axis & y-axis to fit job titles
scale_y_continuous(labels = scales::comma) + #Scale to readable values
labs(title = "Top Computer Occupations by Median Pay (2022)",
x = "Job Title",
y = "Median Pay Yearly ($)") +
theme(legend.position = "none")We see that the ranking of median pay for the jobs differs from the job growth.
To find if there is any correlation, we create a graph to compare median pay and job growth.
combined_data %>%
ggplot(aes(x=PercentChange, y=MedianPayYearly, color = JobTitle)) +
geom_point() +
labs(title = "Median Pay vs Percent Change (2022–2032)",
x = "PercentChange (%)",
y = "Median Pay Yearly ($)")combined_data %>%
ggplot(aes(x=PercentChange, y=MedianPayYearly, color = JobTitle)) +
geom_point() +
labs(title = "Median Pay vs Percent Change (2022–2032)",
x = "PercentChange (%)",
y = "Median Pay Yearly ($)") +
theme(legend.position = "none")There doesn’t seem to be a correlation between the two values.
As such, we can simply filter according to the extremes of both values.
Using the previous graph as reference, the extremes of both values can be used as filters.
combined_data %>%
filter(MedianPayYearly > 120000 | PercentChange > 25) %>%
ggplot(aes(x=reorder(JobTitle,PercentChange), y=PercentChange)) +
geom_col(aes(fill=JobTitle)) +
coord_flip() +
labs(title = "Top Computer Occupations by Median Pay or Growth (2022–2032)",
x = "Job Title",
y = "PercentChange (%)") +
theme(legend.position = "none")This shows the top choices for computer-related professions for both pay and job growth.
The graphs differs slightly as computer network architects, the bottom in terms of job growth, ranks highest in terms of median pay.
Additional variables need to be considered for a more complete graph.
combined_data %>%
ggplot(aes(x=reorder(JobTitle,PercentChange), y=PercentChange)) +
geom_col(aes(fill=JobTitle)) +
coord_flip() + ##Flip x-axis & y-axis to fit job titles
labs(title = "Top Computer Occupations by Job Growth (2022–2032)",
x = "Job Title",
y = "PercentChange (%)") +
theme(legend.position = "none")combined_data %>%
ggplot(aes(x=reorder(JobTitle,MedianPayYearly), y=MedianPayYearly)) +
geom_col(aes(fill=JobTitle)) +
coord_flip() + ##Flip x-axis & y-axis to fit job titles
scale_y_continuous(labels = scales::comma) + #Scale to readable values
labs(title = "Top Computer Occupations by Median Pay (2022)",
x = "Job Title",
y = "Median Pay Yearly ($)") +
theme(legend.position = "none")combined_data %>%
filter(MedianPayYearly > 120000 | PercentChange > 25) %>%
ggplot(aes(x=reorder(JobTitle,PercentChange), y=PercentChange)) +
geom_col(aes(fill=JobTitle)) +
coord_flip() +
labs(title = "Top Computer Occupations by Median Pay or Growth (2022–2032)",
x = "Job Title",
y = "PercentChange (%)") +
theme(legend.position = "none")U.S. Bureau of Labor Statistics: https://www.bls.gov/ooh/computer-and-information-technology/
Projections Central: https://projectionscentral.org/longterm