Overview

The data set used here is from the article “The (Very) Long Tail of Hurricane Recovery”.
https://projects.fivethirtyeight.com/sandy-311/

Questions:

  Try to improve on the code in Diana’s approach for determining the top 5 agencies for each year, and
  Try to improve on her plot that shows “which agencies were involved with the 311 calls each year (limiting it to only the top 5 agencies     per        year.”
## This is to pull csv into R dataframe
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
sandy_data <-read_csv("https://raw.githubusercontent.com/dianaplunkett/607/main/sandy-311-calls-by-day.csv")
## Rows: 1783 Columns: 25
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (1): date
## dbl (24): NYC-3-1-1, ACS, BPSI, CAU, CHALL, DEP, DOB, DOE, DOF, DOHMH, DPR, ...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(sandy_data)
## # A tibble: 6 x 25
##   date   `NYC-3-1-1`   ACS  BPSI   CAU CHALL   DEP   DOB   DOE   DOF DOHMH   DPR
##   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10/22~           0     0     0     0     0     0     0     1     0     0     0
## 2 10/23~           0     0     0     0     0     0     0     1     0     0     0
## 3 10/24~           0     0     0     0     0     0     0     1     0     0     0
## 4 10/25~           0     0     0     0     0     0     0     4     0     0     0
## 5 10/26~           0     0     0     0     0     0     0    36     0     0     0
## 6 10/27~         207     0     0     0     0     0     0   312     0     0     0
## # ... with 13 more variables: FEMA <dbl>, HPD <dbl>, HRA <dbl>, MFANYC <dbl>,
## #   MOSE <dbl>, NYCEM <dbl>, NYCHA <dbl>, NYCSERVICE <dbl>, NYPD <dbl>,
## #   NYSDOL <dbl>, SBS <dbl>, NYSEMERGENCYMG <dbl>, total <dbl>
##glimpse(sandy_data)

This is to break date column into Month, Day and Year columns

library(tidyr)
sandy_data <- tidyr::separate(sandy_data, date, c('Month', 'Day', 'Year'), sep = "/",remove = FALSE)
head(sandy_data)
## # A tibble: 6 x 28
##   date   Month Day   Year  `NYC-3-1-1`   ACS  BPSI   CAU CHALL   DEP   DOB   DOE
##   <chr>  <chr> <chr> <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10/22~ 10    22    12              0     0     0     0     0     0     0     1
## 2 10/23~ 10    23    12              0     0     0     0     0     0     0     1
## 3 10/24~ 10    24    12              0     0     0     0     0     0     0     1
## 4 10/25~ 10    25    12              0     0     0     0     0     0     0     4
## 5 10/26~ 10    26    12              0     0     0     0     0     0     0    36
## 6 10/27~ 10    27    12            207     0     0     0     0     0     0   312
## # ... with 16 more variables: DOF <dbl>, DOHMH <dbl>, DPR <dbl>, FEMA <dbl>,
## #   HPD <dbl>, HRA <dbl>, MFANYC <dbl>, MOSE <dbl>, NYCEM <dbl>, NYCHA <dbl>,
## #   NYCSERVICE <dbl>, NYPD <dbl>, NYSDOL <dbl>, SBS <dbl>,
## #   NYSEMERGENCYMG <dbl>, total <dbl>
## This is to pull required columns only (I removed the date, Month, Day and total columns)

library(tidyr)
sandy_data <- sandy_data[, c("Year", "NYC-3-1-1", "ACS", "BPSI", "CAU", "CHALL", "DEP", "DOB", "DOE", "DOF", "DOHMH", "DPR", "FEMA", "HPD", "HRA", "MFANYC", "MOSE", "NYCEM", "NYCHA", "NYCSERVICE", "NYPD", "NYSDOL", "SBS", "NYSEMERGENCYMG")]

head(sandy_data)
## # A tibble: 6 x 24
##   Year  `NYC-3-1-1`   ACS  BPSI   CAU CHALL   DEP   DOB   DOE   DOF DOHMH   DPR
##   <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12              0     0     0     0     0     0     0     1     0     0     0
## 2 12              0     0     0     0     0     0     0     1     0     0     0
## 3 12              0     0     0     0     0     0     0     1     0     0     0
## 4 12              0     0     0     0     0     0     0     4     0     0     0
## 5 12              0     0     0     0     0     0     0    36     0     0     0
## 6 12            207     0     0     0     0     0     0   312     0     0     0
## # ... with 12 more variables: FEMA <dbl>, HPD <dbl>, HRA <dbl>, MFANYC <dbl>,
## #   MOSE <dbl>, NYCEM <dbl>, NYCHA <dbl>, NYCSERVICE <dbl>, NYPD <dbl>,
## #   NYSDOL <dbl>, SBS <dbl>, NYSEMERGENCYMG <dbl>

This is to update the NYC-3-1-1 column name into NYC311 just to keep it clean and consistent

colnames(sandy_data) <- c("Year", "NYC311", "ACS", "BPSI", "CAU", "CHALL", "DEP", "DOB", "DOE", "DOF", "DOHMH", "DPR", "FEMA", "HPD", "HRA", "MFANYC", "MOSE", "NYCEM", "NYCHA", "NYCSERVICE", "NYPD", "NYSDOL", "SBS", "NYSEMERGENCYMG")

head(sandy_data)
## # A tibble: 6 x 24
##   Year  NYC311   ACS  BPSI   CAU CHALL   DEP   DOB   DOE   DOF DOHMH   DPR  FEMA
##   <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12         0     0     0     0     0     0     0     1     0     0     0     0
## 2 12         0     0     0     0     0     0     0     1     0     0     0     0
## 3 12         0     0     0     0     0     0     0     1     0     0     0     0
## 4 12         0     0     0     0     0     0     0     4     0     0     0     0
## 5 12         0     0     0     0     0     0     0    36     0     0     0     0
## 6 12       207     0     0     0     0     0     0   312     0     0     0     0
## # ... with 11 more variables: HPD <dbl>, HRA <dbl>, MFANYC <dbl>, MOSE <dbl>,
## #   NYCEM <dbl>, NYCHA <dbl>, NYCSERVICE <dbl>, NYPD <dbl>, NYSDOL <dbl>,
## #   SBS <dbl>, NYSEMERGENCYMG <dbl>

This is to summarize the data in the dataframe by year. The <- will update the dataframe with the new summarized data

data <- sandy_data %>%
  group_by(Year) %>%
   summarise_if(is.numeric, sum, na.rm = TRUE)

data
## # A tibble: 6 x 24
##   Year  NYC311   ACS  BPSI   CAU CHALL   DEP   DOB   DOE   DOF DOHMH   DPR  FEMA
##   <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 12     28295    30   152  1371  6925    60  1934 12597  1076   197   572     0
## 2 13         0    22    19    38  3519   108   844    53  1821   348     0     0
## 3 14         0     0     0     0   196    26    91     0   472    52     0     0
## 4 15         1     0     0     0     8    15    57     0   170    10     0    10
## 5 16         2     0     0     0    16     0    39     0    46     8     0     0
## 6 17         1     0     0     0     7     0    20     0    14    16     0     0
## # ... with 11 more variables: HPD <dbl>, HRA <dbl>, MFANYC <dbl>, MOSE <dbl>,
## #   NYCEM <dbl>, NYCHA <dbl>, NYCSERVICE <dbl>, NYPD <dbl>, NYSDOL <dbl>,
## #   SBS <dbl>, NYSEMERGENCYMG <dbl>

This is to pull the agencies that have the highest call volumes in each year

colnames(data)[apply(data,1,which.max)]
## [1] "NYC311"         "CHALL"          "NYSEMERGENCYMG" "NYSEMERGENCYMG"
## [5] "NYSEMERGENCYMG" "NYSEMERGENCYMG"
## the code was running with errors so I had to use suppress warning command. But right now it is running without any errors. Not sure how it was taken care of.
##suppressWarnings(colnames(data)[apply(data,1,which.max)])

This is to view number of calls the top agencies received in each year

##install.packages("matrixStats")
library(matrixStats)
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
data1 <- (rowMaxs(as.matrix(data[,c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24)])))
data1
## [1] 28295  3519   519   271   120    69

This is to transpose the columns and rows from the data dataframe

tdata = setNames(data.frame(t(data[,-1])), data[,1])


colnames(tdata) <- c("2012", "2013", "2014", "2015", "2016", "2017")



head(tdata)
##         2012 2013 2014 2015 2016 2017
## NYC311 28295    0    0    1    2    1
## ACS       30   22    0    0    0    0
## BPSI     152   19    0    0    0    0
## CAU     1371   38    0    0    0    0
## CHALL   6925 3519  196    8   16    7
## DEP       60  108   26   15    0    0
tdata
##                 2012 2013 2014 2015 2016 2017
## NYC311         28295    0    0    1    2    1
## ACS               30   22    0    0    0    0
## BPSI             152   19    0    0    0    0
## CAU             1371   38    0    0    0    0
## CHALL           6925 3519  196    8   16    7
## DEP               60  108   26   15    0    0
## DOB             1934  844   91   57   39   20
## DOE            12597   53    0    0    0    0
## DOF             1076 1821  472  170   46   14
## DOHMH            197  348   52   10    8   16
## DPR              572    0    0    0    0    0
## FEMA               0    0    0   10    0    0
## HPD               85 1382    0    0    0    0
## HRA             1380    0    0    0    0    0
## MFANYC          1164    0    0    0    0    0
## MOSE              70  164    9    0    0    0
## NYCEM           1229  149   24    0    0    0
## NYCHA            135   57    0    0    0    0
## NYCSERVICE       473  932   40    0    0    0
## NYPD              91  115   35   21   18   12
## NYSDOL          4490  638   16    0    0    0
## SBS              524  715   47    8    0    3
## NYSEMERGENCYMG     0 1895  519  271  120   69

Conclusion:

I tried to plot the graph but was not able to. The transpose of the data did not go as expected and I was not able to name the first column and hence had issues wih the plot.