#ANgel Gallardo

analyze nyc monthly jobs by industry for the last 34 years. Which industry provide more stability? WHat is the growth by industry for the last 10 years?

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)

nycemp <- read.csv("https://raw.githubusercontent.com/Angelogallardo05/Week6/main/nychist.csv")
head(nycemp)

##    AREA YEAR SERIESCODE      AREANAME                   INDUSTRY_TITLE     JAN
## 1 93561 2023          0 New York City                    Total Nonfarm 4,575.1
## 2 93561 2023    5000000 New York City                    Total Private 4,004.1
## 3 93561 2023    6000000 New York City                  Goods Producing   197.7
## 4 93561 2023    7000000 New York City                Service-Providing 4,377.4
## 5 93561 2023    8000000 New York City        Private Service Providing 3,806.4
## 6 93561 2023   15000000 New York City Mining, Logging and Construction   141.6
##       FEB     MAR     APR     MAY     JUN     JUL     AUG     SEP     OCT
## 1 4,623.3 4,649.7 4,647.8 4,679.5 4,695.2 4,681.4 4,665.2 4,690.1 4,725.7
## 2 4,052.3 4,075.3 4,076.5 4,107.7 4,126.6 4,103.1 4,088.7 4,113.5 4,149.3
## 3   202.5   203.8   206.6   211.0   213.8   215.1   215.4   217.5   219.5
## 4 4,420.8 4,445.9 4,441.2 4,468.5 4,481.4 4,466.3 4,449.8 4,472.6 4,506.2
## 5 3,849.8 3,871.5 3,869.9 3,896.7 3,912.8 3,888.0 3,873.3 3,896.0 3,929.8
## 6   145.6   146.4   149.2   153.5   155.7   157.7   157.9   159.8   161.7
##       NOV     DEC ANNUAL  X JAN.1 FEB.1 MAR.1 APR.1 MAY.1 JUN.1 JUL.1 AUG.1
## 1 4,743.9 4,756.6        NA 215.4 190.2 181.0 150.6 159.6 149.1  50.4  39.7
## 2 4,171.2 4,181.5        NA 211.0 185.9 176.6 145.7 155.0 144.8 127.6 116.8
## 3   212.0   210.5        NA   5.3   7.2   6.4   6.4   9.6  10.1  13.0  13.2
## 4 4,531.9 4,546.1        NA 210.1 183.0 174.6 144.2 150.0 139.0  37.4  26.5
## 5 3,959.2 3,971.0        NA 205.7 178.7 170.2 139.3 145.4 134.7 114.6 103.6
## 6   154.4   153.7        NA   4.9   7.0   5.9   6.6  10.0  10.4  13.6  13.5
##   SEP.1 OCT.1 NOV.1 DEC.1 AVG X.1 JAN.2 FEB.2 MAR.2 APR.2 MAY.2 JUN.2 JUL.2
## 1 127.0  95.9  72.6  53.6  NA  NA  4.9%  4.3%  4.1%  3.3%  3.5%  3.3%  1.1%
## 2 112.3  91.2  73.6  54.9  NA  NA  5.6%  4.8%  4.5%  3.7%  3.9%  3.6%  3.2%
## 3  14.8  16.0   5.1   5.8  NA  NA  2.8%  3.7%  3.2%  3.2%  4.8%  5.0%  6.4%
## 4 112.2  79.9  67.5  47.8  NA  NA  5.0%  4.3%  4.1%  3.4%  3.5%  3.2%  0.8%
## 5  97.5  75.2  68.5  49.1  NA  NA  5.7%  4.9%  4.6%  3.7%  3.9%  3.6%  3.0%
## 6  15.0  16.3   6.1   7.5  NA  NA  3.6%  5.1%  4.2%  4.6%  7.0%  7.2%  9.4%
##   AUG.2 SEP.2 OCT.2 NOV.2 DEC.2 AVG.1 X.2
## 1  0.9%  2.8%  2.1%  1.6%  1.1%        NA
## 2  2.9%  2.8%  2.2%  1.8%  1.3%        NA
## 3  6.5%  7.3%  7.9%  2.5%  2.8%        NA
## 4  0.6%  2.6%  1.8%  1.5%  1.1%        NA
## 5  2.7%  2.6%  2.0%  1.8%  1.3%        NA
## 6  9.3% 10.4% 11.2%  4.1%  5.1%        NA

make the columns with numbers as numeric.

nycemp <- nycemp %>%
  mutate(across(starts_with("JAN"):ends_with("ANNUAL"), ~as.numeric(gsub(",", "", .))))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(...)`.
## Caused by warning in `x:y`:
## ! numerical expression has 3 elements: only the first used

head(nycemp)

##    AREA YEAR SERIESCODE      AREANAME                   INDUSTRY_TITLE    JAN
## 1 93561 2023          0 New York City                    Total Nonfarm 4575.1
## 2 93561 2023    5000000 New York City                    Total Private 4004.1
## 3 93561 2023    6000000 New York City                  Goods Producing  197.7
## 4 93561 2023    7000000 New York City                Service-Providing 4377.4
## 5 93561 2023    8000000 New York City        Private Service Providing 3806.4
## 6 93561 2023   15000000 New York City Mining, Logging and Construction  141.6
##      FEB    MAR    APR    MAY    JUN    JUL    AUG    SEP    OCT    NOV    DEC
## 1 4623.3 4649.7 4647.8 4679.5 4695.2 4681.4 4665.2 4690.1 4725.7 4743.9 4756.6
## 2 4052.3 4075.3 4076.5 4107.7 4126.6 4103.1 4088.7 4113.5 4149.3 4171.2 4181.5
## 3  202.5  203.8  206.6  211.0  213.8  215.1  215.4  217.5  219.5  212.0  210.5
## 4 4420.8 4445.9 4441.2 4468.5 4481.4 4466.3 4449.8 4472.6 4506.2 4531.9 4546.1
## 5 3849.8 3871.5 3869.9 3896.7 3912.8 3888.0 3873.3 3896.0 3929.8 3959.2 3971.0
## 6  145.6  146.4  149.2  153.5  155.7  157.7  157.9  159.8  161.7  154.4  153.7
##   ANNUAL  X JAN.1 FEB.1 MAR.1 APR.1 MAY.1 JUN.1 JUL.1 AUG.1 SEP.1 OCT.1 NOV.1
## 1     NA NA 215.4 190.2 181.0 150.6 159.6 149.1  50.4  39.7 127.0  95.9  72.6
## 2     NA NA 211.0 185.9 176.6 145.7 155.0 144.8 127.6 116.8 112.3  91.2  73.6
## 3     NA NA   5.3   7.2   6.4   6.4   9.6  10.1  13.0  13.2  14.8  16.0   5.1
## 4     NA NA 210.1 183.0 174.6 144.2 150.0 139.0  37.4  26.5 112.2  79.9  67.5
## 5     NA NA 205.7 178.7 170.2 139.3 145.4 134.7 114.6 103.6  97.5  75.2  68.5
## 6     NA NA   4.9   7.0   5.9   6.6  10.0  10.4  13.6  13.5  15.0  16.3   6.1
##   DEC.1 AVG X.1 JAN.2 FEB.2 MAR.2 APR.2 MAY.2 JUN.2 JUL.2 AUG.2 SEP.2 OCT.2
## 1  53.6  NA  NA  4.9%  4.3%  4.1%  3.3%  3.5%  3.3%  1.1%  0.9%  2.8%  2.1%
## 2  54.9  NA  NA  5.6%  4.8%  4.5%  3.7%  3.9%  3.6%  3.2%  2.9%  2.8%  2.2%
## 3   5.8  NA  NA  2.8%  3.7%  3.2%  3.2%  4.8%  5.0%  6.4%  6.5%  7.3%  7.9%
## 4  47.8  NA  NA  5.0%  4.3%  4.1%  3.4%  3.5%  3.2%  0.8%  0.6%  2.6%  1.8%
## 5  49.1  NA  NA  5.7%  4.9%  4.6%  3.7%  3.9%  3.6%  3.0%  2.7%  2.6%  2.0%
## 6   7.5  NA  NA  3.6%  5.1%  4.2%  4.6%  7.0%  7.2%  9.4%  9.3% 10.4% 11.2%
##   NOV.2 DEC.2 AVG.1 X.2
## 1  1.6%  1.1%        NA
## 2  1.8%  1.3%        NA
## 3  2.5%  2.8%        NA
## 4  1.5%  1.1%        NA
## 5  1.8%  1.3%        NA
## 6  4.1%  5.1%        NA

remove other columns

nycemp <- nycemp %>%
  select(-19:-47)

#fill the average colu,m

nycemp <- nycemp %>%
  rowwise() %>%
  mutate(ANNUAL = ifelse(is.na(ANNUAL), mean(c_across(JAN:DEC), na.rm = TRUE), ANNUAL)) %>%
  ungroup()

head(nycemp)

## # A tibble: 6 × 18
##    AREA  YEAR SERIESCODE AREANAME   INDUSTRY_TITLE   JAN   FEB   MAR   APR   MAY
##   <int> <int>      <int> <chr>      <chr>          <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 93561  2023          0 New York … Total Nonfarm  4575. 4623. 4650. 4648. 4680.
## 2 93561  2023    5000000 New York … Total Private  4004. 4052. 4075. 4076. 4108.
## 3 93561  2023    6000000 New York … Goods Produci…  198.  202.  204.  207.  211 
## 4 93561  2023    7000000 New York … Service-Provi… 4377. 4421. 4446. 4441. 4468.
## 5 93561  2023    8000000 New York … Private Servi… 3806. 3850. 3872. 3870. 3897.
## 6 93561  2023   15000000 New York … Mining, Loggi…  142.  146.  146.  149.  154.
## # ℹ 8 more variables: JUN <dbl>, JUL <dbl>, AUG <dbl>, SEP <dbl>, OCT <dbl>,
## #   NOV <dbl>, DEC <dbl>, ANNUAL <dbl>

pivot wider to make the Years into columns

nycemp_year <- nycemp %>%
  group_by(INDUSTRY_TITLE, AREANAME, YEAR) %>%
  summarise(ANNUAL = mean(ANNUAL)) %>%
  select(INDUSTRY_TITLE, AREANAME, YEAR, ANNUAL)

## `summarise()` has grouped output by 'INDUSTRY_TITLE', 'AREANAME'. You can
## override using the `.groups` argument.

nycemp_filtered <- nycemp_year %>%
  pivot_wider(names_from = YEAR, values_from = ANNUAL) %>%
  mutate_all(~ replace(., is.na(.), 0)) %>%
  select(INDUSTRY_TITLE, AREANAME, order(as.numeric(colnames(.))))

## `mutate_all()` ignored the following grouping variables:
## • Columns `INDUSTRY_TITLE`, `AREANAME`
## ℹ Use `mutate_at(df, vars(-group_cols()), myoperation)` to silence the message.

## Warning in order(as.numeric(colnames(.))): NAs introduced by coercion

calculate the average job growth rate for each industry in the past 10 years.

last_10_years <- nycemp_filtered %>%
  select(INDUSTRY_TITLE, AREANAME, `2014`:`2023`)

# Calculate percentage changes for each year
percentage_changes <- last_10_years %>%
  mutate(
    Percentage_Change_2015 = ((`2015` - `2014`) / `2014`) * 100,
    Percentage_Change_2016 = ((`2016` - `2015`) / `2015`) * 100,
    Percentage_Change_2017 = ((`2017` - `2016`) / `2016`) * 100,
    Percentage_Change_2018 = ((`2018` - `2017`) / `2017`) * 100,
    Percentage_Change_2019 = ((`2019` - `2018`) / `2018`) * 100,
    Percentage_Change_2020 = ((`2020` - `2019`) / `2019`) * 100,
    Percentage_Change_2021 = ((`2021` - `2020`) / `2020`) * 100,
    Percentage_Change_2022 = ((`2022` - `2021`) / `2021`) * 100,
    Percentage_Change_2023 = ((`2023` - `2022`) / `2022`) * 100
  ) %>%
  select(-matches("^\\d{4}$"))  # Remove original columns

sort by highest grwoth in 2023

highest_growth_2023 <- percentage_changes %>%
  filter(Percentage_Change_2023 == max(Percentage_Change_2023)) %>%
  arrange(desc(Percentage_Change_2023))



glimpse(percentage_changes)

## Rows: 122
## Columns: 11
## Groups: INDUSTRY_TITLE, AREANAME [122]
## $ INDUSTRY_TITLE         <chr> "Accommodation", "Accommodation and Food Servic…
## $ AREANAME               <chr> "New York City", "New York City", "New York Cit…
## $ Percentage_Change_2015 <dbl> 1.1857708, 5.1650367, 7.9497908, 3.5517759, 3.6…
## $ Percentage_Change_2016 <dbl> 2.3437500, 2.9061319, 5.2325581, 3.1400966, 3.0…
## $ Percentage_Change_2017 <dbl> 0.9541985, 3.9819260, 0.5524862, 2.9976581, 2.8…
## $ Percentage_Change_2018 <dbl> 0.3780718, 0.6518197, -1.6483516, 6.4574807, 6.…
## $ Percentage_Change_2019 <dbl> 0.5649718, 0.6475985, 3.1657356, 6.6211021, 6.3…
## $ Percentage_Change_2020 <dbl> -46.067416, -41.715818, -3.429603, -12.620192, …
## $ Percentage_Change_2021 <dbl> -3.4722222, 13.3394664, 4.2990654, 3.1636864, 2…
## $ Percentage_Change_2022 <dbl> 42.805755, 31.939935, 3.942652, 9.377778, 9.373…
## $ Percentage_Change_2023 <dbl> 12.8253568, 9.3227725, -2.9022989, 1.9233374, 2…

percentage_changes <- percentage_changes %>%
  rename_with(~ gsub("Percentage_Change_", "", .), starts_with("Percentage_Change_"))

Home Health care services have the least violatile job growth in the last 10 years

top_5_industries <- highest_growth_2023$INDUSTRY_TITLE[1:5]
top_5_growth <- percentage_changes %>%
  filter(INDUSTRY_TITLE %in% top_5_industries) %>%
  pivot_longer(cols = starts_with("2"), 
               names_to = "Year", 
               values_to = "Percentage_Change")

# Plot
ggplot(top_5_growth, aes(x = Year, y = Percentage_Change, group = INDUSTRY_TITLE, color = INDUSTRY_TITLE)) +
  geom_line() +
  geom_point() +
  labs(title = "Top 5 Industries with Highest Growth Percentage (2014-2023)",
       x = "Year",
       y = "Percentage Change") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

HW6 1of3 DATA607

Angel Gallardo

2024-03-02

analyze nyc monthly jobs by industry for the last 34 years. Which industry provide more stability? WHat is the growth by industry for the last 10 years?

make the columns with numbers as numeric.

remove other columns

pivot wider to make the Years into columns

calculate the average job growth rate for each industry in the past 10 years.

sort by highest grwoth in 2023

Home Health care services have the least violatile job growth in the last 10 years