Load packages

library(dplyr)
library(ggplot2)
library(readxl)

Load data

Data from : https://www.bls.gov/emp/ind-occ-matrix/occupation.xlsx

occupation <- read_excel("occupation new.xlsx", sheet = "Table 1.2")
colnames(occupation) <- occupation[1,]
occupation <- occupation[-1,]
occupation$`Occupation type` <- as.factor(occupation$`Occupation type`)
occupation$`Employment, 2030` <- as.numeric(occupation$`Employment, 2030`)
occupation$`Employment, 2020` <- as.numeric(occupation$`Employment, 2020`)
occupation$`Percent employment change, 2020-30` <- as.numeric(occupation$`Percent employment change, 2020-30`)

occupation$increase_to_2030 <- occupation$`Employment, 2030` - occupation$`Employment, 2020`

Exploratory data analysis

occupation %>%
  filter(`Occupation type`=="Summary") %>%
  arrange(desc(increase_to_2030)) %>%
  select(`2020 National Employment Matrix title`, increase_to_2030, `Percent employment change, 2020-30`)
## # A tibble: 258 × 3
##    `2020 National Employment Matrix title`     increase_to_2030 `Percent emplo…`
##    <chr>                                                  <dbl>            <dbl>
##  1 Total, all occupations                                11880.              7.7
##  2 Food preparation and serving related occup…            2268.             19.6
##  3 Healthcare support occupations                         1580.             23.1
##  4 Home health and personal care aides; and n…            1252.             25.2
##  5 Transportation and material moving occupat…            1120.              8.8
##  6 Food and beverage serving workers                      1120.             18  
##  7 Healthcare practitioners and technical occ…             974.             10.8
##  8 Educational instruction and library occupa…             920.             10.1
##  9 Management occupations                                  907.              9.3
## 10 Personal care and service occupations                   841              21.7
## # … with 248 more rows
df<-occupation %>%
  filter(`Occupation type`=="Line item") %>%
  arrange(desc(increase_to_2030)) %>%
  select(`2020 National Employment Matrix title`, increase_to_2030, `Percent employment change, 2020-30`)
df %>%
  filter(increase_to_2030>100) %>%
  mutate(`2020 National Employment Matrix title`=reorder(`2020 National Employment Matrix title`,increase_to_2030))  %>%
  ggplot(aes(`2020 National Employment Matrix title`,increase_to_2030))+
  geom_bar(stat="identity", fill="orange", colour="red")+
  coord_flip()+
  ylim(0,1200)+
 geom_text(aes(label=increase_to_2030), hjust=0, size=2)+
  ylab("Employment change, 2020-30") +
  xlab("Employment") +
  ggtitle("Employment change, 2020-30 in thousands",subtitle = "from 2020 to 2030 in the US by job type above 100,000")

df %>%
  filter(`Percent employment change, 2020-30`>30 ) %>%
  mutate(`2020 National Employment Matrix title`=reorder(`2020 National Employment Matrix title`,`Percent employment change, 2020-30`))  %>%
  ggplot(aes(`2020 National Employment Matrix title`,`Percent employment change, 2020-30`))+
  geom_bar(stat="identity", fill="red", colour="orange")+
  coord_flip()+
  ylim(0,80)+
 geom_text(aes(label=`Percent employment change, 2020-30`), hjust=0, size=3)+
  ylab("Percent employment change, 2020-30") +
  xlab("Employment") +
  ggtitle("Percent employment change, 2020-30",subtitle = "from 2020 to 2030 in the US by job type above 30% ")