#ANgel Gallardo

analyze nyc monthly jobs by industry for the last 34 years. Which industry provide more stability? WHat is the growth by industry for the last 10 years?

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
nycemp <- read.csv("https://raw.githubusercontent.com/Angelogallardo05/Week6/main/nychist.csv")
head(nycemp)
##    AREA YEAR SERIESCODE      AREANAME                   INDUSTRY_TITLE     JAN
## 1 93561 2023          0 New York City                    Total Nonfarm 4,575.1
## 2 93561 2023    5000000 New York City                    Total Private 4,004.1
## 3 93561 2023    6000000 New York City                  Goods Producing   197.7
## 4 93561 2023    7000000 New York City                Service-Providing 4,377.4
## 5 93561 2023    8000000 New York City        Private Service Providing 3,806.4
## 6 93561 2023   15000000 New York City Mining, Logging and Construction   141.6
##       FEB     MAR     APR     MAY     JUN     JUL     AUG     SEP     OCT
## 1 4,623.3 4,649.7 4,647.8 4,679.5 4,695.2 4,681.4 4,665.2 4,690.1 4,725.7
## 2 4,052.3 4,075.3 4,076.5 4,107.7 4,126.6 4,103.1 4,088.7 4,113.5 4,149.3
## 3   202.5   203.8   206.6   211.0   213.8   215.1   215.4   217.5   219.5
## 4 4,420.8 4,445.9 4,441.2 4,468.5 4,481.4 4,466.3 4,449.8 4,472.6 4,506.2
## 5 3,849.8 3,871.5 3,869.9 3,896.7 3,912.8 3,888.0 3,873.3 3,896.0 3,929.8
## 6   145.6   146.4   149.2   153.5   155.7   157.7   157.9   159.8   161.7
##       NOV     DEC ANNUAL  X JAN.1 FEB.1 MAR.1 APR.1 MAY.1 JUN.1 JUL.1 AUG.1
## 1 4,743.9 4,756.6        NA 215.4 190.2 181.0 150.6 159.6 149.1  50.4  39.7
## 2 4,171.2 4,181.5        NA 211.0 185.9 176.6 145.7 155.0 144.8 127.6 116.8
## 3   212.0   210.5        NA   5.3   7.2   6.4   6.4   9.6  10.1  13.0  13.2
## 4 4,531.9 4,546.1        NA 210.1 183.0 174.6 144.2 150.0 139.0  37.4  26.5
## 5 3,959.2 3,971.0        NA 205.7 178.7 170.2 139.3 145.4 134.7 114.6 103.6
## 6   154.4   153.7        NA   4.9   7.0   5.9   6.6  10.0  10.4  13.6  13.5
##   SEP.1 OCT.1 NOV.1 DEC.1 AVG X.1 JAN.2 FEB.2 MAR.2 APR.2 MAY.2 JUN.2 JUL.2
## 1 127.0  95.9  72.6  53.6  NA  NA  4.9%  4.3%  4.1%  3.3%  3.5%  3.3%  1.1%
## 2 112.3  91.2  73.6  54.9  NA  NA  5.6%  4.8%  4.5%  3.7%  3.9%  3.6%  3.2%
## 3  14.8  16.0   5.1   5.8  NA  NA  2.8%  3.7%  3.2%  3.2%  4.8%  5.0%  6.4%
## 4 112.2  79.9  67.5  47.8  NA  NA  5.0%  4.3%  4.1%  3.4%  3.5%  3.2%  0.8%
## 5  97.5  75.2  68.5  49.1  NA  NA  5.7%  4.9%  4.6%  3.7%  3.9%  3.6%  3.0%
## 6  15.0  16.3   6.1   7.5  NA  NA  3.6%  5.1%  4.2%  4.6%  7.0%  7.2%  9.4%
##   AUG.2 SEP.2 OCT.2 NOV.2 DEC.2 AVG.1 X.2
## 1  0.9%  2.8%  2.1%  1.6%  1.1%        NA
## 2  2.9%  2.8%  2.2%  1.8%  1.3%        NA
## 3  6.5%  7.3%  7.9%  2.5%  2.8%        NA
## 4  0.6%  2.6%  1.8%  1.5%  1.1%        NA
## 5  2.7%  2.6%  2.0%  1.8%  1.3%        NA
## 6  9.3% 10.4% 11.2%  4.1%  5.1%        NA

make the columns with numbers as numeric.

nycemp <- nycemp %>%
  mutate(across(starts_with("JAN"):ends_with("ANNUAL"), ~as.numeric(gsub(",", "", .))))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(...)`.
## Caused by warning in `x:y`:
## ! numerical expression has 3 elements: only the first used
head(nycemp)
##    AREA YEAR SERIESCODE      AREANAME                   INDUSTRY_TITLE    JAN
## 1 93561 2023          0 New York City                    Total Nonfarm 4575.1
## 2 93561 2023    5000000 New York City                    Total Private 4004.1
## 3 93561 2023    6000000 New York City                  Goods Producing  197.7
## 4 93561 2023    7000000 New York City                Service-Providing 4377.4
## 5 93561 2023    8000000 New York City        Private Service Providing 3806.4
## 6 93561 2023   15000000 New York City Mining, Logging and Construction  141.6
##      FEB    MAR    APR    MAY    JUN    JUL    AUG    SEP    OCT    NOV    DEC
## 1 4623.3 4649.7 4647.8 4679.5 4695.2 4681.4 4665.2 4690.1 4725.7 4743.9 4756.6
## 2 4052.3 4075.3 4076.5 4107.7 4126.6 4103.1 4088.7 4113.5 4149.3 4171.2 4181.5
## 3  202.5  203.8  206.6  211.0  213.8  215.1  215.4  217.5  219.5  212.0  210.5
## 4 4420.8 4445.9 4441.2 4468.5 4481.4 4466.3 4449.8 4472.6 4506.2 4531.9 4546.1
## 5 3849.8 3871.5 3869.9 3896.7 3912.8 3888.0 3873.3 3896.0 3929.8 3959.2 3971.0
## 6  145.6  146.4  149.2  153.5  155.7  157.7  157.9  159.8  161.7  154.4  153.7
##   ANNUAL  X JAN.1 FEB.1 MAR.1 APR.1 MAY.1 JUN.1 JUL.1 AUG.1 SEP.1 OCT.1 NOV.1
## 1     NA NA 215.4 190.2 181.0 150.6 159.6 149.1  50.4  39.7 127.0  95.9  72.6
## 2     NA NA 211.0 185.9 176.6 145.7 155.0 144.8 127.6 116.8 112.3  91.2  73.6
## 3     NA NA   5.3   7.2   6.4   6.4   9.6  10.1  13.0  13.2  14.8  16.0   5.1
## 4     NA NA 210.1 183.0 174.6 144.2 150.0 139.0  37.4  26.5 112.2  79.9  67.5
## 5     NA NA 205.7 178.7 170.2 139.3 145.4 134.7 114.6 103.6  97.5  75.2  68.5
## 6     NA NA   4.9   7.0   5.9   6.6  10.0  10.4  13.6  13.5  15.0  16.3   6.1
##   DEC.1 AVG X.1 JAN.2 FEB.2 MAR.2 APR.2 MAY.2 JUN.2 JUL.2 AUG.2 SEP.2 OCT.2
## 1  53.6  NA  NA  4.9%  4.3%  4.1%  3.3%  3.5%  3.3%  1.1%  0.9%  2.8%  2.1%
## 2  54.9  NA  NA  5.6%  4.8%  4.5%  3.7%  3.9%  3.6%  3.2%  2.9%  2.8%  2.2%
## 3   5.8  NA  NA  2.8%  3.7%  3.2%  3.2%  4.8%  5.0%  6.4%  6.5%  7.3%  7.9%
## 4  47.8  NA  NA  5.0%  4.3%  4.1%  3.4%  3.5%  3.2%  0.8%  0.6%  2.6%  1.8%
## 5  49.1  NA  NA  5.7%  4.9%  4.6%  3.7%  3.9%  3.6%  3.0%  2.7%  2.6%  2.0%
## 6   7.5  NA  NA  3.6%  5.1%  4.2%  4.6%  7.0%  7.2%  9.4%  9.3% 10.4% 11.2%
##   NOV.2 DEC.2 AVG.1 X.2
## 1  1.6%  1.1%        NA
## 2  1.8%  1.3%        NA
## 3  2.5%  2.8%        NA
## 4  1.5%  1.1%        NA
## 5  1.8%  1.3%        NA
## 6  4.1%  5.1%        NA

remove other columns

nycemp <- nycemp %>%
  select(-19:-47)

#fill the average colu,m

nycemp <- nycemp %>%
  rowwise() %>%
  mutate(ANNUAL = ifelse(is.na(ANNUAL), mean(c_across(JAN:DEC), na.rm = TRUE), ANNUAL)) %>%
  ungroup()

head(nycemp)
## # A tibble: 6 × 18
##    AREA  YEAR SERIESCODE AREANAME   INDUSTRY_TITLE   JAN   FEB   MAR   APR   MAY
##   <int> <int>      <int> <chr>      <chr>          <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 93561  2023          0 New York … Total Nonfarm  4575. 4623. 4650. 4648. 4680.
## 2 93561  2023    5000000 New York … Total Private  4004. 4052. 4075. 4076. 4108.
## 3 93561  2023    6000000 New York … Goods Produci…  198.  202.  204.  207.  211 
## 4 93561  2023    7000000 New York … Service-Provi… 4377. 4421. 4446. 4441. 4468.
## 5 93561  2023    8000000 New York … Private Servi… 3806. 3850. 3872. 3870. 3897.
## 6 93561  2023   15000000 New York … Mining, Loggi…  142.  146.  146.  149.  154.
## # ℹ 8 more variables: JUN <dbl>, JUL <dbl>, AUG <dbl>, SEP <dbl>, OCT <dbl>,
## #   NOV <dbl>, DEC <dbl>, ANNUAL <dbl>

pivot wider to make the Years into columns

nycemp_year <- nycemp %>%
  group_by(INDUSTRY_TITLE, AREANAME, YEAR) %>%
  summarise(ANNUAL = mean(ANNUAL)) %>%
  select(INDUSTRY_TITLE, AREANAME, YEAR, ANNUAL)
## `summarise()` has grouped output by 'INDUSTRY_TITLE', 'AREANAME'. You can
## override using the `.groups` argument.
nycemp_filtered <- nycemp_year %>%
  pivot_wider(names_from = YEAR, values_from = ANNUAL) %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  select(INDUSTRY_TITLE, AREANAME, order(as.numeric(colnames(.))))  
## `mutate_all()` ignored the following grouping variables:
## • Columns `INDUSTRY_TITLE`, `AREANAME`
## ℹ Use `mutate_at(df, vars(-group_cols()), myoperation)` to silence the message.
## Warning in order(as.numeric(colnames(.))): NAs introduced by coercion

calculate the average job growth rate for each industry in the past 10 years.

last_10_years <- nycemp_filtered %>%
  select(INDUSTRY_TITLE, AREANAME, `2014`:`2023`)

# Calculate percentage changes for each year
percentage_changes <- last_10_years %>%
  mutate(
    Percentage_Change_2015 = ((`2015` - `2014`) / `2014`) * 100,
    Percentage_Change_2016 = ((`2016` - `2015`) / `2015`) * 100,
    Percentage_Change_2017 = ((`2017` - `2016`) / `2016`) * 100,
    Percentage_Change_2018 = ((`2018` - `2017`) / `2017`) * 100,
    Percentage_Change_2019 = ((`2019` - `2018`) / `2018`) * 100,
    Percentage_Change_2020 = ((`2020` - `2019`) / `2019`) * 100,
    Percentage_Change_2021 = ((`2021` - `2020`) / `2020`) * 100,
    Percentage_Change_2022 = ((`2022` - `2021`) / `2021`) * 100,
    Percentage_Change_2023 = ((`2023` - `2022`) / `2022`) * 100
  ) %>%
  select(-matches("^\\d{4}$"))  # Remove original columns

sort by highest grwoth in 2023

highest_growth_2023 <- percentage_changes %>%
  filter(Percentage_Change_2023 == max(Percentage_Change_2023)) %>%
  arrange(desc(Percentage_Change_2023))



glimpse(percentage_changes)
## Rows: 122
## Columns: 11
## Groups: INDUSTRY_TITLE, AREANAME [122]
## $ INDUSTRY_TITLE         <chr> "Accommodation", "Accommodation and Food Servic…
## $ AREANAME               <chr> "New York City", "New York City", "New York Cit…
## $ Percentage_Change_2015 <dbl> 1.1857708, 5.1650367, 7.9497908, 3.5517759, 3.6…
## $ Percentage_Change_2016 <dbl> 2.3437500, 2.9061319, 5.2325581, 3.1400966, 3.0…
## $ Percentage_Change_2017 <dbl> 0.9541985, 3.9819260, 0.5524862, 2.9976581, 2.8…
## $ Percentage_Change_2018 <dbl> 0.3780718, 0.6518197, -1.6483516, 6.4574807, 6.…
## $ Percentage_Change_2019 <dbl> 0.5649718, 0.6475985, 3.1657356, 6.6211021, 6.3…
## $ Percentage_Change_2020 <dbl> -46.067416, -41.715818, -3.429603, -12.620192, …
## $ Percentage_Change_2021 <dbl> -3.4722222, 13.3394664, 4.2990654, 3.1636864, 2…
## $ Percentage_Change_2022 <dbl> 42.805755, 31.939935, 3.942652, 9.377778, 9.373…
## $ Percentage_Change_2023 <dbl> 12.8253568, 9.3227725, -2.9022989, 1.9233374, 2…
percentage_changes <- percentage_changes %>%
  rename_with(~ gsub("Percentage_Change_", "", .), starts_with("Percentage_Change_"))

Home Health care services have the least violatile job growth in the last 10 years

top_5_industries <- highest_growth_2023$INDUSTRY_TITLE[1:5]
top_5_growth <- percentage_changes %>%
  filter(INDUSTRY_TITLE %in% top_5_industries) %>%
  pivot_longer(cols = starts_with("2"), 
               names_to = "Year", 
               values_to = "Percentage_Change")

# Plot
ggplot(top_5_growth, aes(x = Year, y = Percentage_Change, group = INDUSTRY_TITLE, color = INDUSTRY_TITLE)) +
  geom_line() +
  geom_point() +
  labs(title = "Top 5 Industries with Highest Growth Percentage (2014-2023)",
       x = "Year",
       y = "Percentage Change") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))