library(readxl)
## Warning: package 'readxl' was built under R version 4.5.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(janitor)
## Warning: package 'janitor' was built under R version 4.5.3
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(tidyr)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
``` r
# Install only once
#install.packages(c("readxl", "dplyr", "janitor"))
# Load libraries
library(readxl)
library(dplyr)
library(janitor)
install.packages("readxl")
## Warning: package 'readxl' is in use and will not be installed
library(readxl)
# File paths
library(readxl)
genai_file <- "C:/Users/sarat/Downloads/JSA Gen AI Capacity Study AP Publication Chart Data 20250930.xlsx"
employment_projection_file <- "C:/Users/sarat/Downloads/employment_projections_-_may_2025_to_may_2035.xlsx"
shortage_file <- "C:/Users/sarat/Downloads/2025 Occupation Shortage List - 6 digit ANZSCO and OSCA.xlsx"
tnv_file <- "C:/Users/sarat/Downloads/TNV Data Feb 2026.xlsx"
vet_file <- "C:/Users/sarat/Downloads/vnda_2020-21_graduate_outcomes.xlsx"
# Check sheet names
excel_sheets(genai_file)
## [1] "Contents" "Cover" "APA Figure 1"
## [4] "APA Figure 2" "APA Figure 3" "APA Figure 4"
## [7] "APA Figure 5" "APA Figure 6" "APA Figure 7"
## [10] "APA Figure 8" "APA Figure 9" "APA Figure 10"
## [13] "APA Figure 11" "APA Figure 12" "APA Table 1"
## [16] "APA Table 2" "APA Table 3" "APA Table 4"
## [19] "APA Box 1" "APB Figure 1" "APB Figure 2"
## [22] "APB Figure 3" "APB Table 1" "APB Table 2"
## [25] "APB Table 3" "APB Box 1" "APC Figure 1"
## [28] "APC Figure 2" "APC Figure 3" "APC Table 1"
## [31] "APC Table 2" "APC Table 3" "APC Box 1"
## [34] "APC Box 2" "APD Figure 1" "APD Figure 2"
## [37] "APD Figure 3" "APD Figure 4" "APD Figure 5"
## [40] "APD Table 1" "APD Table 2" "APD Table 3"
## [43] "APD Table 4" "APD Table 5" "APD Table 6"
## [46] "APD Table 7" "APD Box 1" "APD Box 2"
## [49] "APD Box 3" "APE Figure 1" "APE Figure 2"
## [52] "APE Figure 3" "APE Figure 4" "APE Figure 5"
## [55] "APE Figure 6" "APE Figure 7" "APE Figure 8"
## [58] "APE Figure 9" "APE Figure 10" "APE Figure 11"
## [61] "APE Figure 12" "APE Figure 13" "APE Figure 14"
## [64] "APE Figure 15" "APE Figure 16" "APE Figure 17"
## [67] "APE Figure 18" "APE Figure 19" "APE Figure 20"
## [70] "APE Figure 21" "APE Figure 22" "APE Table 1"
## [73] "APE Table 2" "APE Box 1" "APE Box 2"
## [76] "APE Box 3" "APE Box 4" "APE Box 5"
## [79] "APF Figure 1" "APF Figure 2" "APF Figure 3"
## [82] "APF Figure 4" "APF Figure 5" "APF Figure 6"
## [85] "APF Table 1" "APF Table 2" "APF Table 3"
## [88] "APF Table 4" "APF Table 5" "APF Table 6"
## [91] "APF Table 7" "APF Box 1" "APF Box 2"
## [94] "APF Box 3" "CASE STUDY Table 1"
excel_sheets(employment_projection_file)
## [1] "Contents" "Data_Dictionary"
## [3] "Table_1 Industry Division" "Table_2 Major Occupation"
## [5] "Table_3 Skill Level" "Table_4 State & Territory"
## [7] "Table_5 Industry Group" "Table_6 Occupation Unit Group"
excel_sheets(shortage_file)
## [1] "Contents" "2025 OSL (ANZSCO 2022)" "2025 OSL (OSCA 2024)"
excel_sheets(tnv_file)
## [1] "Contents" "Table_1" "Data_1" "Table_2" "Data_2"
## [6] "Concordance"
excel_sheets(vet_file)
## [1] "Contents" "Data_Dictionary"
## [3] "1. National and cohort results" "2. State results"
## [5] "3. AQF results" "4. AQF-FOE (National) results"
## [7] "5. AQF-FOE (State) results" "6. Qualification results"
## [9] "7. Qualification by occupation"
# 1. GenAI data
# APA Figure 2 has occupation-level automation and augmentation scores
# Load only the required packages
library(readxl)
library(janitor)
library(dplyr)
library(ggplot2)
library(plotly)
# Load APA Figure 2
genai_occupation <- read_excel(
path = genai_file,
sheet = "APA Figure 2",
skip = 3
)
## New names:
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
# Clean column names
genai_occupation <- janitor::clean_names(genai_occupation)
# Check if object exists
exists("genai_occupation")
## [1] TRUE
# Check column names
names(genai_occupation)
## [1] "automation_score" "augmentation_score" "anzsco_code"
## [4] "anzsco_unit_title" "colour_theme" "x6"
## [7] "x7" "x8" "x9"
# View first rows
head(genai_occupation)
## # A tibble: 6 × 9
## automation_score augmentation_score anzsco_code anzsco_unit_title colour_theme
## <chr> <dbl> <dbl> <chr> <chr>
## 1 0.26 0.67 1111 Chief Executives… #570408
## 2 0.31 0.66 1112 General Managers #570408
## 3 0.26 0.63 1113 Legislators #570408
## 4 0.41 0.66 1211 Aquaculture Farm… #570408
## 5 0.34 0.67 1212 Crop Farmers #570408
## 6 0.34 0.65 1213 Livestock Farmers #570408
## # ℹ 4 more variables: x6 <lgl>, x7 <lgl>, x8 <lgl>, x9 <chr>
# Check dimensions
dim(genai_occupation)
## [1] 359 9
genai_occupation_clean <- genai_occupation %>%
select(
automation_score,
augmentation_score,
anzsco_code,
anzsco_unit_title,
colour_theme
) %>%
mutate(
automation_score = as.numeric(automation_score),
augmentation_score = as.numeric(augmentation_score),
anzsco_code = as.character(anzsco_code),
genai_exposure = automation_score + augmentation_score
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `automation_score = as.numeric(automation_score)`.
## Caused by warning:
## ! NAs introduced by coercion
# Check cleaned data
head(genai_occupation_clean)
## # A tibble: 6 × 6
## automation_score augmentation_score anzsco_code anzsco_unit_title colour_theme
## <dbl> <dbl> <chr> <chr> <chr>
## 1 0.26 0.67 1111 Chief Executives… #570408
## 2 0.31 0.66 1112 General Managers #570408
## 3 0.26 0.63 1113 Legislators #570408
## 4 0.41 0.66 1211 Aquaculture Farm… #570408
## 5 0.34 0.67 1212 Crop Farmers #570408
## 6 0.34 0.65 1213 Livestock Farmers #570408
## # ℹ 1 more variable: genai_exposure <dbl>
summary(genai_occupation_clean)
## automation_score augmentation_score anzsco_code anzsco_unit_title
## Min. :0.1000 Min. :0.3100 Length:359 Length:359
## 1st Qu.:0.2400 1st Qu.:0.5600 Class :character Class :character
## Median :0.3300 Median :0.6500 Mode :character Mode :character
## Mean :0.3516 Mean :0.6221
## 3rd Qu.:0.4400 3rd Qu.:0.7000
## Max. :0.8100 Max. :0.7900
## NA's :2 NA's :2
## colour_theme genai_exposure
## Length:359 Min. :0.4100
## Class :character 1st Qu.:0.8000
## Mode :character Median :0.9800
## Mean :0.9737
## 3rd Qu.:1.1400
## Max. :1.5700
## NA's :2
dim(genai_occupation_clean)
## [1] 359 6
chart1_data <- genai_occupation_clean %>%
arrange(desc(genai_exposure)) %>%
slice_head(n = 15)
chart1_data
## # A tibble: 15 × 6
## automation_score augmentation_score anzsco_code anzsco_unit_title
## <dbl> <dbl> <chr> <chr>
## 1 0.81 0.76 5321 Keyboard Operators
## 2 0.81 0.76 6393 Telemarketers
## 3 0.76 0.74 5613 Filing and Registry Clerks
## 4 0.76 0.73 5994 Human Resource Clerks
## 5 0.73 0.75 4516 Tourism and Travel Advisers
## 6 0.75 0.73 5411 Call or Contact Centre Worke…
## 7 0.69 0.77 2222 Financial Dealers
## 8 0.69 0.77 5512 Bookkeepers
## 9 0.71 0.74 5511 Accounting Clerks
## 10 0.68 0.75 5999 Other Miscellaneous Clerical…
## 11 0.68 0.74 5523 Insurance, Money Market and …
## 12 0.63 0.77 2613 Software and Applications Pr…
## 13 0.71 0.69 5611 Betting Clerks
## 14 0.71 0.68 5311 General Clerks
## 15 0.68 0.71 5522 Credit and Loans Officers
## # ℹ 2 more variables: colour_theme <chr>, genai_exposure <dbl>
genai_occupation_clean <- genai_occupation %>%
select(
automation_score,
augmentation_score,
anzsco_code,
anzsco_unit_title,
colour_theme
) %>%
mutate(
automation_score = as.numeric(automation_score),
augmentation_score = as.numeric(augmentation_score),
anzsco_code = as.character(anzsco_code),
genai_exposure = automation_score + augmentation_score
) %>%
filter(
!is.na(automation_score),
!is.na(augmentation_score),
!is.na(genai_exposure)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `automation_score = as.numeric(automation_score)`.
## Caused by warning:
## ! NAs introduced by coercion
# Recreate top 15 chart data
chart1_data <- genai_occupation_clean %>%
arrange(desc(genai_exposure)) %>%
slice_head(n = 15)
chart1 <- ggplot(
chart1_data,
aes(
x = reorder(anzsco_unit_title, genai_exposure),
y = genai_exposure,
text = paste0(
"Occupation: ", anzsco_unit_title,
"<br>ANZSCO Code: ", anzsco_code,
"<br>Automation Score: ", round(automation_score, 2),
"<br>Augmentation Score: ", round(augmentation_score, 2),
"<br>Total GenAI Exposure: ", round(genai_exposure, 2)
)
)
) +
geom_col(fill = "#007377") +
coord_flip() +
labs(
title = "Top occupations most exposed to GenAI",
subtitle = "Top 15 occupations ranked by combined automation and augmentation exposure",
x = NULL,
y = "Combined GenAI exposure score",
caption = "Source: Jobs and Skills Australia, Generative AI Capacity Study"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10),
axis.text.y = element_text(size = 9),
plot.caption = element_text(size = 8),
plot.margin = margin(10, 20, 10, 10)
)
ggplotly(chart1, tooltip = "text")
# Chart 2 data
chart2_data <- genai_occupation_clean %>%
filter(
!is.na(automation_score),
!is.na(augmentation_score)
)
# Make sure packages are loaded
library(dplyr)
library(ggplot2)
library(plotly)
# Recreate Chart 2 data
chart2_data <- genai_occupation_clean %>%
filter(
!is.na(automation_score),
!is.na(augmentation_score),
!is.na(genai_exposure)
)
# Check chart2_data exists
exists("chart2_data")
## [1] TRUE
dim(chart2_data)
## [1] 357 6
head(chart2_data)
## # A tibble: 6 × 6
## automation_score augmentation_score anzsco_code anzsco_unit_title colour_theme
## <dbl> <dbl> <chr> <chr> <chr>
## 1 0.26 0.67 1111 Chief Executives… #570408
## 2 0.31 0.66 1112 General Managers #570408
## 3 0.26 0.63 1113 Legislators #570408
## 4 0.41 0.66 1211 Aquaculture Farm… #570408
## 5 0.34 0.67 1212 Crop Farmers #570408
## 6 0.34 0.65 1213 Livestock Farmers #570408
## # ℹ 1 more variable: genai_exposure <dbl>
chart2 <- ggplot(
chart2_data,
aes(
x = automation_score,
y = augmentation_score,
text = paste0(
"Occupation: ", anzsco_unit_title,
"<br>ANZSCO Code: ", anzsco_code,
"<br>Automation Score: ", round(automation_score, 2),
"<br>Augmentation Score: ", round(augmentation_score, 2),
"<br>Total GenAI Exposure: ", round(genai_exposure, 2)
)
)
) +
geom_point(alpha = 0.55, size = 2.1, colour = "#007377") +
geom_vline(
xintercept = median(chart2_data$automation_score, na.rm = TRUE),
linetype = "dashed",
colour = "grey50"
) +
geom_hline(
yintercept = median(chart2_data$augmentation_score, na.rm = TRUE),
linetype = "dashed",
colour = "grey50"
) +
labs(
title = "Exposure does not mean replacement",
subtitle = "Each dot represents one occupation; scores compare automation pressure with augmentation potential",
x = "Automation exposure score",
y = "Augmentation exposure score",
caption = "Source: Jobs and Skills Australia, Generative AI Capacity Study"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 8),
plot.margin = margin(10, 20, 10, 10)
)
ggplotly(chart2, tooltip = "text")
# Load required packages
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
# Create occupation groups using first digit of ANZSCO code
chart3_data <- genai_occupation_clean %>%
mutate(
anzsco_major_group_code = substr(anzsco_code, 1, 1),
occupation_group = case_when(
anzsco_major_group_code == "1" ~ "Managers",
anzsco_major_group_code == "2" ~ "Professionals",
anzsco_major_group_code == "3" ~ "Technicians and Trades Workers",
anzsco_major_group_code == "4" ~ "Community and Personal Service Workers",
anzsco_major_group_code == "5" ~ "Clerical and Administrative Workers",
anzsco_major_group_code == "6" ~ "Sales Workers",
anzsco_major_group_code == "7" ~ "Machinery Operators and Drivers",
anzsco_major_group_code == "8" ~ "Labourers",
TRUE ~ "Other"
)
) %>%
group_by(occupation_group) %>%
summarise(
avg_automation = mean(automation_score, na.rm = TRUE),
avg_augmentation = mean(augmentation_score, na.rm = TRUE),
avg_genai_exposure = mean(genai_exposure, na.rm = TRUE),
number_of_occupations = n(),
.groups = "drop"
) %>%
arrange(desc(avg_genai_exposure))
# Check Chart 3 data
chart3_data
## # A tibble: 8 × 5
## occupation_group avg_automation avg_augmentation avg_genai_exposure
## <chr> <dbl> <dbl> <dbl>
## 1 Clerical and Administrativ… 0.600 0.692 1.29
## 2 Sales Workers 0.475 0.651 1.13
## 3 Managers 0.395 0.691 1.09
## 4 Professionals 0.391 0.693 1.08
## 5 Community and Personal Ser… 0.296 0.571 0.867
## 6 Technicians and Trades Wor… 0.269 0.595 0.863
## 7 Machinery Operators and Dr… 0.275 0.548 0.823
## 8 Labourers 0.196 0.456 0.652
## # ℹ 1 more variable: number_of_occupations <int>
chart3_long <- chart3_data %>%
select(
occupation_group,
avg_automation,
avg_augmentation
) %>%
pivot_longer(
cols = c(avg_automation, avg_augmentation),
names_to = "exposure_type",
values_to = "score"
) %>%
mutate(
exposure_type = case_when(
exposure_type == "avg_automation" ~ "Automation exposure",
exposure_type == "avg_augmentation" ~ "Augmentation exposure",
TRUE ~ exposure_type
)
)
chart3_long
## # A tibble: 16 × 3
## occupation_group exposure_type score
## <chr> <chr> <dbl>
## 1 Clerical and Administrative Workers Automation exposure 0.600
## 2 Clerical and Administrative Workers Augmentation exposure 0.692
## 3 Sales Workers Automation exposure 0.475
## 4 Sales Workers Augmentation exposure 0.651
## 5 Managers Automation exposure 0.395
## 6 Managers Augmentation exposure 0.691
## 7 Professionals Automation exposure 0.391
## 8 Professionals Augmentation exposure 0.693
## 9 Community and Personal Service Workers Automation exposure 0.296
## 10 Community and Personal Service Workers Augmentation exposure 0.571
## 11 Technicians and Trades Workers Automation exposure 0.269
## 12 Technicians and Trades Workers Augmentation exposure 0.595
## 13 Machinery Operators and Drivers Automation exposure 0.275
## 14 Machinery Operators and Drivers Augmentation exposure 0.548
## 15 Labourers Automation exposure 0.196
## 16 Labourers Augmentation exposure 0.456
chart3 <- ggplot(
chart3_long,
aes(
x = reorder(occupation_group, score),
y = score,
fill = exposure_type,
text = paste0(
"Occupation group: ", occupation_group,
"<br>Exposure type: ", exposure_type,
"<br>Average score: ", round(score, 2)
)
)
) +
geom_col(position = "dodge") +
coord_flip() +
labs(
title = "AI exposure by occupation group",
subtitle = "Average automation and augmentation exposure by major occupation group",
x = NULL,
y = "Average exposure score",
fill = "Exposure type",
caption = "Source: Jobs and Skills Australia, Generative AI Capacity Study"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 8),
legend.position = "bottom",
plot.margin = margin(10, 20, 10, 10)
)
ggplotly(chart3, tooltip = "text")
chart3 <- ggplot(
chart3_long,
aes(
x = reorder(occupation_group, score),
y = score,
fill = exposure_type,
text = paste0(
"Occupation group: ", occupation_group,
"<br>Exposure type: ", exposure_type,
"<br>Average score: ", round(score, 2)
)
)
) +
geom_col(position = "dodge") +
coord_flip() +
labs(
title = "AI exposure by occupation group",
subtitle = "Average automation and augmentation scores across major ANZSCO groups",
x = NULL,
y = "Average exposure score",
fill = "Exposure type",
caption = "Source: Jobs and Skills Australia, Generative AI Capacity Study"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 8),
legend.position = "bottom",
plot.margin = margin(10, 20, 10, 10)
)
ggplotly(chart3, tooltip = "text")
employment_projection <- read_excel(
path = employment_projection_file,
sheet = "Table_6 Occupation Unit Group",
skip = 3
) %>%
clean_names()
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
# 2. Employment projections
library(readxl)
library(dplyr)
library(janitor)
employment_projection <- read_excel(
path = employment_projection_file,
sheet = "Table_6 Occupation Unit Group",
skip = 7
) %>%
clean_names()
## New names:
## • `Projected` -> `Projected...7`
## • `Projected` -> `Projected...8`
## • `` -> `...10`
## • `` -> `...12`
names(employment_projection)
## [1] "occupation_level" "nfd_indicator" "anzsco_code" "occupation"
## [5] "skill_level_1" "baseline" "projected_7" "projected_8"
## [9] "x5_year_change" "x10" "x10_year_change" "x12"
head(employment_projection)
## # A tibble: 6 × 12
## occupation_level nfd_indicator anzsco_code occupation skill_level_1 baseline
## <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 <NA> <NA> NA <NA> <NA> May 202…
## 2 1 N 1 Managers - 1872.74…
## 3 2 Y 10 Managers nfd - 6.15635…
## 4 3 Y 100 Managers nfd - 6.15635…
## 5 4 Y 1000 Managers nfd 1 6.15635…
## 6 2 N 11 Chief Execu… - 135.498…
## # ℹ 6 more variables: projected_7 <chr>, projected_8 <chr>,
## # x5_year_change <chr>, x10 <chr>, x10_year_change <chr>, x12 <chr>
dim(employment_projection)
## [1] 670 12
employment_projection_raw <- read_excel(
path = employment_projection_file,
sheet = "Table_6 Occupation Unit Group",
skip = 8,
col_names = FALSE
)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
employment_projection <- employment_projection_raw %>%
setNames(c(
"occupation_level",
"nfd_level",
"anzsco_code",
"occupation_title",
"skill_level",
"base_employment_2025",
"projected_employment_2030",
"projected_employment_2035",
"five_year_change_level",
"five_year_change_percent",
"ten_year_change_level",
"ten_year_change_percent"
)) %>%
clean_names()
head(employment_projection)
## # A tibble: 6 × 12
## occupation_level nfd_level anzsco_code occupation_title skill_level
## <chr> <chr> <dbl> <chr> <chr>
## 1 <NA> <NA> NA <NA> <NA>
## 2 1 N 1 Managers -
## 3 2 Y 10 Managers nfd -
## 4 3 Y 100 Managers nfd -
## 5 4 Y 1000 Managers nfd 1
## 6 2 N 11 Chief Executives, General … -
## # ℹ 7 more variables: base_employment_2025 <chr>,
## # projected_employment_2030 <chr>, projected_employment_2035 <chr>,
## # five_year_change_level <chr>, five_year_change_percent <chr>,
## # ten_year_change_level <chr>, ten_year_change_percent <chr>
names(employment_projection)
## [1] "occupation_level" "nfd_level"
## [3] "anzsco_code" "occupation_title"
## [5] "skill_level" "base_employment_2025"
## [7] "projected_employment_2030" "projected_employment_2035"
## [9] "five_year_change_level" "five_year_change_percent"
## [11] "ten_year_change_level" "ten_year_change_percent"
dim(employment_projection)
## [1] 670 12
employment_projection_clean <- employment_projection %>%
mutate(
anzsco_code = as.character(anzsco_code),
base_employment_2025 = as.numeric(base_employment_2025),
projected_employment_2030 = as.numeric(projected_employment_2030),
projected_employment_2035 = as.numeric(projected_employment_2035),
five_year_change_level = as.numeric(five_year_change_level),
five_year_change_percent = as.numeric(five_year_change_percent),
ten_year_change_level = as.numeric(ten_year_change_level),
ten_year_change_percent = as.numeric(ten_year_change_percent)
) %>%
filter(
!is.na(anzsco_code),
nchar(anzsco_code) == 4,
!is.na(ten_year_change_percent)
)
## Warning: There were 7 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `base_employment_2025 = as.numeric(base_employment_2025)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.
head(employment_projection_clean)
## # A tibble: 6 × 12
## occupation_level nfd_level anzsco_code occupation_title skill_level
## <chr> <chr> <chr> <chr> <chr>
## 1 4 Y 1000 Managers nfd 1
## 2 4 Y 1110 Chief Executives, General … 1
## 3 4 N 1111 Chief Executives and Manag… 1
## 4 4 N 1112 General Managers 1
## 5 4 N 1113 Legislators 1
## 6 4 Y 1210 Farmers and Farm Managers … 1
## # ℹ 7 more variables: base_employment_2025 <dbl>,
## # projected_employment_2030 <dbl>, projected_employment_2035 <dbl>,
## # five_year_change_level <dbl>, five_year_change_percent <dbl>,
## # ten_year_change_level <dbl>, ten_year_change_percent <dbl>
dim(employment_projection_clean)
## [1] 474 12
names(employment_projection_clean)
## [1] "occupation_level" "nfd_level"
## [3] "anzsco_code" "occupation_title"
## [5] "skill_level" "base_employment_2025"
## [7] "projected_employment_2030" "projected_employment_2035"
## [9] "five_year_change_level" "five_year_change_percent"
## [11] "ten_year_change_level" "ten_year_change_percent"
head(employment_projection_clean)
## # A tibble: 6 × 12
## occupation_level nfd_level anzsco_code occupation_title skill_level
## <chr> <chr> <chr> <chr> <chr>
## 1 4 Y 1000 Managers nfd 1
## 2 4 Y 1110 Chief Executives, General … 1
## 3 4 N 1111 Chief Executives and Manag… 1
## 4 4 N 1112 General Managers 1
## 5 4 N 1113 Legislators 1
## 6 4 Y 1210 Farmers and Farm Managers … 1
## # ℹ 7 more variables: base_employment_2025 <dbl>,
## # projected_employment_2030 <dbl>, projected_employment_2035 <dbl>,
## # five_year_change_level <dbl>, five_year_change_percent <dbl>,
## # ten_year_change_level <dbl>, ten_year_change_percent <dbl>
dim(employment_projection_clean)
## [1] 474 12
# Join GenAI occupation data with Employment Projections
chart4_data <- genai_occupation_clean %>%
inner_join(
employment_projection_clean,
by = "anzsco_code"
) %>%
filter(
!is.na(genai_exposure),
!is.na(ten_year_change_percent)
)
# Check joined data
head(chart4_data)
## # A tibble: 6 × 17
## automation_score augmentation_score anzsco_code anzsco_unit_title colour_theme
## <dbl> <dbl> <chr> <chr> <chr>
## 1 0.26 0.67 1111 Chief Executives… #570408
## 2 0.31 0.66 1112 General Managers #570408
## 3 0.26 0.63 1113 Legislators #570408
## 4 0.41 0.66 1211 Aquaculture Farm… #570408
## 5 0.34 0.67 1212 Crop Farmers #570408
## 6 0.34 0.65 1213 Livestock Farmers #570408
## # ℹ 12 more variables: genai_exposure <dbl>, occupation_level <chr>,
## # nfd_level <chr>, occupation_title <chr>, skill_level <chr>,
## # base_employment_2025 <dbl>, projected_employment_2030 <dbl>,
## # projected_employment_2035 <dbl>, five_year_change_level <dbl>,
## # five_year_change_percent <dbl>, ten_year_change_level <dbl>,
## # ten_year_change_percent <dbl>
dim(chart4_data)
## [1] 357 17
# Create grouped Chart 4 data
chart4_heatmap_data <- chart4_data %>%
mutate(
exposure_group = case_when(
genai_exposure < quantile(genai_exposure, 0.33, na.rm = TRUE) ~ "Low AI exposure",
genai_exposure < quantile(genai_exposure, 0.66, na.rm = TRUE) ~ "Medium AI exposure",
TRUE ~ "High AI exposure"
),
growth_group = case_when(
ten_year_change_percent <= 0 ~ "Declining or flat",
ten_year_change_percent <= median(ten_year_change_percent, na.rm = TRUE) ~ "Moderate growth",
TRUE ~ "Strong growth"
)
) %>%
group_by(exposure_group, growth_group) %>%
summarise(
occupation_count = n(),
avg_growth = mean(ten_year_change_percent, na.rm = TRUE),
avg_exposure = mean(genai_exposure, na.rm = TRUE),
.groups = "drop"
)
chart4_heatmap_data
## # A tibble: 9 × 5
## exposure_group growth_group occupation_count avg_growth avg_exposure
## <chr> <chr> <int> <dbl> <dbl>
## 1 High AI exposure Declining or flat 6 -0.0322 1.42
## 2 High AI exposure Moderate growth 40 0.0715 1.25
## 3 High AI exposure Strong growth 81 0.192 1.20
## 4 Low AI exposure Declining or flat 4 -0.0223 0.57
## 5 Low AI exposure Moderate growth 89 0.0545 0.692
## 6 Low AI exposure Strong growth 24 0.159 0.752
## 7 Medium AI exposure Declining or flat 3 -0.0131 0.973
## 8 Medium AI exposure Moderate growth 37 0.0708 0.966
## 9 Medium AI exposure Strong growth 73 0.210 0.975
chart4_heatmap_data <- chart4_heatmap_data %>%
mutate(
exposure_group = factor(
exposure_group,
levels = c("Low AI exposure", "Medium AI exposure", "High AI exposure")
),
growth_group = factor(
growth_group,
levels = c("Declining or flat", "Moderate growth", "Strong growth")
)
)
chart4_heatmap <- ggplot(
chart4_heatmap_data,
aes(
x = exposure_group,
y = growth_group,
fill = occupation_count,
text = paste0(
"AI exposure group: ", exposure_group,
"<br>Growth group: ", growth_group,
"<br>Number of occupations: ", occupation_count,
"<br>Average GenAI exposure: ", round(avg_exposure, 2),
"<br>Average projected growth: ", round(avg_growth, 2), "%"
)
)
) +
geom_tile(colour = "white", linewidth = 1) +
geom_text(aes(label = occupation_count), size = 5, fontface = "bold") +
labs(
title = "AI-exposed jobs are not all disappearing",
subtitle = "Number of occupations by GenAI exposure and projected employment growth to 2035",
x = "GenAI exposure level",
y = "Projected employment growth level",
fill = "Occupation count",
caption = "Source: Jobs and Skills Australia, Generative AI Capacity Study and Employment Projections"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 8),
legend.position = "right",
plot.margin = margin(10, 20, 10, 10)
)
ggplotly(chart4_heatmap, tooltip = "text")
# 3. Occupation shortage list
shortage_list <- read_excel(shortage_file,
sheet = "2025 OSL (ANZSCO 2022)",
skip = 3) %>%
clean_names()
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
shortage_raw <- read_excel(
path = shortage_file,
sheet = "2025 OSL (ANZSCO 2022)",
skip = 7
) %>%
clean_names()
names(shortage_raw)
## [1] "occupation_code_anzsco_2022"
## [2] "occupation_title"
## [3] "ns_no_shortage_s_shortage_r_regional_shortage_m_metro_shortage"
## [4] "nsw"
## [5] "vic"
## [6] "qld"
## [7] "sa"
## [8] "wa"
## [9] "tas"
## [10] "nt"
## [11] "act"
## [12] "skill_level"
## [13] "major_occupation_group"
head(shortage_raw)
## # A tibble: 6 × 13
## occupation_code_anzsco_2…¹ occupation_title ns_no_shortage_s_sho…² nsw vic
## <dbl> <chr> <chr> <chr> <chr>
## 1 111111 Chief Executive… NS NS NS
## 2 111211 Corporate Gener… NS NS NS
## 3 121111 Aquaculture Far… NS NS NS
## 4 121311 Apiarist NS NS NS
## 5 121312 Beef Cattle Far… NS NS NS
## 6 121313 Dairy Cattle Fa… NS NS NS
## # ℹ abbreviated names: ¹occupation_code_anzsco_2022,
## # ²ns_no_shortage_s_shortage_r_regional_shortage_m_metro_shortage
## # ℹ 8 more variables: qld <chr>, sa <chr>, wa <chr>, tas <chr>, nt <chr>,
## # act <chr>, skill_level <dbl>, major_occupation_group <dbl>
dim(shortage_raw)
## [1] 916 13
shortage_clean <- shortage_raw %>%
rename(
shortage_rating = ns_no_shortage_s_shortage_r_regional_shortage_m_metro_shortage
) %>%
mutate(
anzsco_6_digit = as.character(occupation_code_anzsco_2022),
anzsco_code = substr(anzsco_6_digit, 1, 4),
shortage_status = case_when(
shortage_rating == "S" ~ "Shortage",
shortage_rating == "R" ~ "Regional shortage",
shortage_rating == "M" ~ "Metro shortage",
shortage_rating == "NS" ~ "No shortage",
TRUE ~ "Other"
),
is_shortage = ifelse(shortage_rating %in% c("S", "R", "M"), 1, 0)
) %>%
filter(
!is.na(anzsco_code),
nchar(anzsco_code) == 4
)
head(shortage_clean)
## # A tibble: 6 × 17
## occupation_code_anzsco_2022 occupation_title shortage_rating nsw vic qld
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 111111 Chief Executive… NS NS NS NS
## 2 111211 Corporate Gener… NS NS NS NS
## 3 121111 Aquaculture Far… NS NS NS NS
## 4 121311 Apiarist NS NS NS NS
## 5 121312 Beef Cattle Far… NS NS NS NS
## 6 121313 Dairy Cattle Fa… NS NS NS NS
## # ℹ 11 more variables: sa <chr>, wa <chr>, tas <chr>, nt <chr>, act <chr>,
## # skill_level <dbl>, major_occupation_group <dbl>, anzsco_6_digit <chr>,
## # anzsco_code <chr>, shortage_status <chr>, is_shortage <dbl>
dim(shortage_clean)
## [1] 916 17
shortage_summary <- shortage_clean %>%
group_by(anzsco_code) %>%
summarise(
shortage_count = sum(is_shortage, na.rm = TRUE),
total_6_digit_occupations = n(),
shortage_share = shortage_count / total_6_digit_occupations,
.groups = "drop"
)
head(shortage_summary)
## # A tibble: 6 × 4
## anzsco_code shortage_count total_6_digit_occupations shortage_share
## <chr> <dbl> <int> <dbl>
## 1 1111 0 1 0
## 2 1112 0 1 0
## 3 1211 0 1 0
## 4 1213 0 11 0
## 5 1215 0 4 0
## 6 1216 0 8 0
dim(shortage_summary)
## [1] 311 4
chart5_data <- genai_occupation_clean %>%
inner_join(shortage_summary, by = "anzsco_code") %>%
mutate(
exposure_group = case_when(
genai_exposure < quantile(genai_exposure, 0.33, na.rm = TRUE) ~ "Low AI exposure",
genai_exposure < quantile(genai_exposure, 0.66, na.rm = TRUE) ~ "Medium AI exposure",
TRUE ~ "High AI exposure"
),
shortage_group = case_when(
shortage_share == 0 ~ "No shortage",
shortage_share > 0 & shortage_share < 0.5 ~ "Partial shortage",
shortage_share >= 0.5 ~ "Strong shortage",
TRUE ~ "Unknown"
)
)
head(chart5_data)
## # A tibble: 6 × 11
## automation_score augmentation_score anzsco_code anzsco_unit_title colour_theme
## <dbl> <dbl> <chr> <chr> <chr>
## 1 0.26 0.67 1111 Chief Executives… #570408
## 2 0.31 0.66 1112 General Managers #570408
## 3 0.41 0.66 1211 Aquaculture Farm… #570408
## 4 0.34 0.65 1213 Livestock Farmers #570408
## 5 0.38 0.72 1311 Advertising, Pub… #570408
## 6 0.46 0.71 1321 Corporate Servic… #570408
## # ℹ 6 more variables: genai_exposure <dbl>, shortage_count <dbl>,
## # total_6_digit_occupations <int>, shortage_share <dbl>,
## # exposure_group <chr>, shortage_group <chr>
dim(chart5_data)
## [1] 297 11
chart5_heatmap_data <- chart5_data %>%
group_by(exposure_group, shortage_group) %>%
summarise(
occupation_count = n(),
avg_exposure = mean(genai_exposure, na.rm = TRUE),
avg_shortage_share = mean(shortage_share, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
exposure_group = factor(
exposure_group,
levels = c("Low AI exposure", "Medium AI exposure", "High AI exposure")
),
shortage_group = factor(
shortage_group,
levels = c("No shortage", "Partial shortage", "Strong shortage")
)
)
chart5_heatmap_data
## # A tibble: 9 × 5
## exposure_group shortage_group occupation_count avg_exposure avg_shortage_share
## <fct> <fct> <int> <dbl> <dbl>
## 1 High AI expos… No shortage 89 1.23 0
## 2 High AI expos… Partial short… 7 1.30 0.312
## 3 High AI expos… Strong shorta… 12 1.18 0.706
## 4 Low AI exposu… No shortage 32 0.746 0
## 5 Low AI exposu… Partial short… 9 0.774 0.268
## 6 Low AI exposu… Strong shorta… 56 0.755 0.903
## 7 Medium AI exp… No shortage 48 1.01 0
## 8 Medium AI exp… Partial short… 9 1.01 0.164
## 9 Medium AI exp… Strong shorta… 35 0.989 0.892
chart5 <- ggplot(
chart5_heatmap_data,
aes(
x = exposure_group,
y = shortage_group,
fill = occupation_count,
text = paste0(
"AI exposure group: ", exposure_group,
"<br>Shortage group: ", shortage_group,
"<br>Number of occupations: ", occupation_count,
"<br>Average GenAI exposure: ", round(avg_exposure, 2),
"<br>Average shortage share: ", round(avg_shortage_share * 100, 1), "%"
)
)
) +
geom_tile(colour = "white", linewidth = 1) +
geom_text(aes(label = occupation_count), size = 5, fontface = "bold") +
scale_fill_gradient(
low = "#FDB863",
high = "#D7301F"
) +
labs(
title = "Some AI-exposed jobs are still in shortage",
subtitle = "Occupation groups by GenAI exposure and 2025 national shortage status",
x = "GenAI exposure level",
y = "Shortage level",
fill = "Occupation count",
caption = "Source: Jobs and Skills Australia, Generative AI Capacity Study and 2025 Occupation Shortage List"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10),
plot.caption = element_text(size = 8),
legend.position = "right",
plot.margin = margin(10, 20, 10, 10)
)
ggplotly(chart5, tooltip = "text")